diff --git a/README.md b/README.md
index e96d12c2..a90524f1 100755
--- a/README.md
+++ b/README.md
@@ -140,6 +140,11 @@ $ ./scripts/build.sh -c ns3
 $  ./bin/SimAI_analytical -w example/workload_analytical.txt -g 9216 -g_p_s 8 -r test- -busbw example/busbw.yaml
 ```
 
+For calculating bus bandwidth autolly, please try the following command:
+```bash
+$  ./bin/SimAI_analytical -w ./example/workload_analytical.txt -g 9216  -nv 360 -nic 48.5 -n_p_s 8 -g_p_s 8 -r example-
+```
+
 ## Use SimAI-Simulation
 
 ```bash
diff --git a/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc b/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc
index 6d7a60d1..48fa0da1 100644
--- a/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc
+++ b/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc
@@ -67,8 +67,8 @@ struct user_param {
 
 int main(int argc,char *argv[]) {
   UserParam* param = UserParam::getInstance();
-  if (param->parseArg(argc,argv)) {
-    std::cerr << "-h,       --help                Help message" << std::endl;
+  if (param->parse(argc,argv)) {
+    std::cerr << "-h,     --help              Help message" << std::endl;
     return -1;
   }
   param->mode = ModeType::ANALYTICAL;
diff --git a/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh b/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh
index 72fb4dee..2cb0d3bb 100644
--- a/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh
+++ b/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh
@@ -32,9 +32,16 @@
 #include "Common.hh"
 #define BUSBW_PATH ""
 using namespace std;
-#include <regex>
+
 enum class ModeType { NONE, ASTRA_SIM, MOCKNCCL, ANALYTICAL };
 
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <iomanip>
+#include <regex>
+
+
 struct NetWorkParam{
   uint32_t node_num;
   uint32_t switch_num;
@@ -43,29 +50,17 @@ struct NetWorkParam{
   uint32_t nvswitch_num;
   uint32_t gpus_per_server;
   uint32_t nics_per_server;
-  uint32_t nvlink_bw;
-  uint32_t nic_bw;
-  GPUType gpu_type;
-  float tp_ar = -1.0f; 
-  float tp_ag = -1.0f; 
-  float tp_rs = -1.0f; 
-  float tp_ata = -1.0f; 
-  float dp_ar = -1.0f; 
-  float dp_ag = -1.0f;
-  float dp_rs = -1.0f;
-  float dp_ata = -1.0f;
-  float ep_ar = -1.0f;
-  float ep_ag = -1.0f; 
-  float ep_rs = -1.0f; 
-  float ep_ata = -1.0f; 
-  float pp = -1.0f; 
+  float nvlink_bw = -1.0;
+  float bw_per_nic = -1.0;
+  char* nic_type = "cx7";
+  bool visual = 0;
   float dp_overlap_ratio = 0;
   float tp_overlap_ratio = 0;
   float ep_overlap_ratio = 0;
   float pp_overlap_ratio = 1;
-  std::vector<int> NVswitchs;
-  std::vector<std::vector<int>> all_gpus;
-  int visual = 0;
+  GPUType gpu_type;
+  std::vector<int>NVswitchs;
+  std::vector<std::vector<int>>all_gpus;
 };
 
 class UserParam {
@@ -82,219 +77,170 @@ private:
   }
 
 public:
-    int thread;
-    std::vector<int> gpus;
-    string workload;
-    string res = "None";
-    int comm_scale;
-    ModeType mode;
-    NetWorkParam net_work_param;
+  int thread;
+  std::vector<int> gpus;
+  std::string workload;
+  std::string res = "None";
+  std::string res_folder = "None";
+  int comm_scale;
+  ModeType mode;
+  NetWorkParam net_work_param;
 
-    static UserParam* getInstance(){
-        std::lock_guard<std::mutex> lock(mtx);
-        if(instance == nullptr){
-        instance = new UserParam();
-        }
-        return instance;
-    }
-
-    void parseYaml(NetWorkParam& params, const std::string& filename) {
-        std::ifstream file(BUSBW_PATH + filename);
-        if (!file) {
-            std::cerr << "Unable to open file: " << filename << std::endl;
-            exit(-1);
-        }
-        std::string line;
-        std::string currentSection;
-        std::getline(file, line);
-        while (std::getline(file, line)) {
-            // Remove whitespace
-
-            line.erase(0, line.find_first_not_of(' '));
-            line.erase(line.find_last_not_of(' ') + 1);
 
-            if (line.empty() || line[0] == '#') continue;
 
-            if (line.back() == ':') {
-                currentSection = line.substr(0, line.size() - 1);
-            } else {
-                std::istringstream ss(line);
-                std::string key, valueStr;
-                if (std::getline(ss, key, ':') && ss >> valueStr) {
-                    key.erase(key.find_last_not_of(' ') + 1);
-
-                    // Remove part after comma
-                    auto commaPos = key.find(',');
-                    if (commaPos != std::string::npos) {
-                        key = key.substr(0, commaPos);
-                    }
+  static UserParam* getInstance(){
+    std::lock_guard<std::mutex> lock(mtx);
+    if(instance == nullptr){
+      instance = new UserParam();
+    }
+    return instance;
+  }
 
-                    if (valueStr != "null") {
-                        float value = std::stof(valueStr);
-                        
-                        if (currentSection == "TP") {
-                            if (key == "allreduce") params.tp_ar = value;
-                            else if (key == "allgather") params.tp_ag = value;
-                            else if (key == "reducescatter") params.tp_rs = value;
-                            else if (key == "alltoall") params.tp_ata = value;
-                        } else if (currentSection == "DP") {
-                            if (key == "allreduce") params.dp_ar = value;
-                            else if (key == "allgather") params.dp_ag = value;
-                            else if (key == "reducescatter") params.dp_rs = value;
-                            else if (key == "alltoall") params.dp_ata = value;
-                        } else if (currentSection == "EP") {
-                            if (key == "allreduce") params.ep_ar = value;
-                            else if (key == "allgather") params.ep_ag = value;
-                            else if (key == "reducescatter") params.ep_rs = value;
-                            else if (key == "alltoall") params.ep_ata = value;
-                        } else if (currentSection == "PP") {
-                            if (key == "busbw") params.pp = value;
-                        }
-                    }
-                }
+int parse(int argc, char *argv[]) {
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "-h" || arg == "--help") {
+            std::cout << "-w,     --workload          Workloads, default none" << std::endl;
+            std::cout << "-g,     --gpus              Number of GPUs, default 1" << std::endl;
+            std::cout << "-g_p_s, --gpus-per-server   GPUs per server" << std::endl;
+            std::cout << "-r,     --result            Output results path" << std::endl;
+            std::cout << "-nv, --nvlink     Nvlink" << std::endl;
+            std::cout << "-nic, --nic_busbw     NIC busbw" << std::endl;
+            std::cout << "-n_p_s, --bus-bandwidth     Bus bandwidth file" << std::endl;
+            std::cout << "-nic_t, --nic_type     NIC type(cx7,bf3),choose when disable nic " << std::endl;
+            std::cout << "-g_type, --gpu_type     GPU type(A100,H100),choose when disable nvlink " << std::endl;
+            std::cout << "-v, --visual    Enable visual output" << std::endl;
+            std::cout << "-dp_o, --dp_overlap    dp overlap ratio(Default 0)" << std::endl;
+            std::cout << "-ep_o, --ep_overlap    ep overlap ratio(Default 0)" << std::endl;
+            std::cout << "-tp_o, --tp_overlap    tp overlap ratio(Default 0)" << std::endl;
+            std::cout << "-pp_o, --pp_overlap    pp overlap ratio(Default 1)" << std::endl;
+            return 1;
+        } else if (arg == "-w" || arg == "--workload") {
+            if (++i < argc) this->workload = argv[i];
+        } else if (arg == "-g" || arg == "--gpus") {
+            if (++i < argc) this->gpus.push_back(std::stoi(argv[i]));
+        } else if (arg == "-r" || arg == "--result") {
+            if (++i < argc) this->res = argv[i];
+        } else if (arg == "-r_f" || arg == "--result_folder") {
+            if (++i < argc) this->res_folder = argv[i];
+        } else if (arg == "-g_p_s" || arg == "--gpus-per-server") {
+            if (++i < argc) this->net_work_param.gpus_per_server = std::stoi(argv[i]);
+        } else if (arg == "-nv" || arg == "--nvlink") {
+            if (++i < argc) this->net_work_param.nvlink_bw = std::stof(argv[i]);
+        } else if (arg == "-nic"|| arg == "--nic_busbw") {
+            if (++i < argc) this->net_work_param.bw_per_nic = std::stof(argv[i]);
+        } else if (arg == "-n_p_s" || arg == "--nic_per_server") {
+            if (++i < argc) this->net_work_param.nics_per_server = std::stoi(argv[i]);
+        } else if (arg == "-nic_t" || arg == "--nic_type") {
+            if (++i < argc) this->net_work_param.nic_type = argv[i];
+        } else if (arg == "-g_type" || arg == "--gpu_type") {
+            if (++i < argc) {
+                std::string gpu_type = argv[i];
+                if (gpu_type == "A100" || gpu_type == "a100") this->net_work_param.gpu_type = GPUType::A100;
+                else if (gpu_type == "A800" || gpu_type == "a800" ) this->net_work_param.gpu_type = GPUType::A800;
+                else if (gpu_type == "H100" || gpu_type == "h100") this->net_work_param.gpu_type = GPUType::H100;
+                else if (gpu_type == "H800" || gpu_type == "h800") this->net_work_param.gpu_type = GPUType::H800;
+                else if (gpu_type == "H20" || gpu_type == "h20") this->net_work_param.gpu_type = GPUType::H20;
+                else this->net_work_param.gpu_type = GPUType::NONE;
             }
+        }else if (arg == "-v" || arg == "--visual") {
+            if (++i < argc) this->net_work_param.visual = std::stoi(argv[i]);
+        }else if (arg == "--dp_overlap" || arg == "-dp_o") {
+            if (++i < argc) this->net_work_param.dp_overlap_ratio = std::stof(argv[i]);
+        }else if (arg == "--tp_overlap" || arg == "-tp_o") {
+            if (++i < argc) this->net_work_param.tp_overlap_ratio = std::stof(argv[i]);
+        }else if (arg == "--ep_overlap" || arg == "-ep_o") {
+            if (++i < argc) this->net_work_param.ep_overlap_ratio = std::stof(argv[i]);
+        }else if (arg == "--pp_overlap" || arg == "-pp_o") {
+            if (++i < argc) this->net_work_param.pp_overlap_ratio = std::stof(argv[i]);
+        }
+        else {
+            return 1; 
         }
     }
 
-    void printHelp() const {
-        std::cout << " ____  _              _    ___        _                _       _   _           _ \n"
-                << "/ ___|(_)_ __ ___    / \\  |_ _|      / \\   _ __   __ _| |_   _| |_(_) ___ __ _| |\n"
-                << "\\___ \\| | '_ ' _ \\  / _ \\  | |_____ / _ \\ | '_ \\ / _' | | | | | __| |/ __/ _' | |\n"
-                << " ___) | | | | | | |/ ___ \\ | |_____/ ___ \\| | | | (_| | | |_| | |_| | (_| (_| | |\n"
-                << "|____/|_|_| |_| |_/_/   \\_\\___|   /_/   \\_\\_| |_|\\__,_|_|\\__, |\\__|_|\\___\\__,_|_|\n"
-                << "                                                           |___/                   \n";
-        std::cout << "-w,       --workload            Workloads, must set" << std::endl;
-        std::cout << "-g,       --gpus                Number of GPUs, default 1" << std::endl;
-        std::cout << "-g_p_s,   --gpus-per-server     GPUs per server" << std::endl;
-        std::cout << "-r,       --result              Output results path, default: ./results/" << std::endl;
-        std::cout << "-busbw,   --bus-bandwidth       Bus bandwidth file, must set" << std::endl;
-        std::cout << "-v,       --visual              Enable visual output (Default disable)" << std::endl;
-        std::cout << "-dp_o,    --dp-overlap-ratio    DP overlap ratio [float: 0.0-1.0] (Default: 0.0)" << std::endl;
-        std::cout << "-ep_o,    --ep-overlap-ratio    EP overlap ratio [float: 0.0-1.0] (Default: 0.0)" << std::endl;
-        std::cout << "-tp_o,    --tp-overlap-ratio    TP overlap ratio [float: 0.0-1.0] (Default: 0.0)" << std::endl;
-        std::cout << "-pp_o,    --pp-overlap-ratio    PP overlap ratio [float: 0.0-1.0] (Default: 1.0)" << std::endl;
-    }
-
-    int printError(const std::string& arg) const {
-        std::cerr << "Error: Missing value for argument '" << arg << "'." << std::endl;
-        return 1;
-    }
-
-    int printUnknownOption(const std::string& arg) const {
-        std::cerr << "Error: Unknown option '" << arg << "'." << std::endl;
-        return 1;
+    if (!this->gpus.empty()) {
+        this->net_work_param.nvswitch_num = this->gpus[0] / this->net_work_param.gpus_per_server;
+        this->net_work_param.switch_num = 120 + this->net_work_param.gpus_per_server;
+        this->net_work_param.node_num = this->net_work_param.nvswitch_num + this->net_work_param.switch_num + this->gpus[0];
     }
 
-    int parseArg(int argc, char *argv[]) {
-        for (int i = 1; i < argc; ++i) {
-            std::string arg = argv[i];
-            if (arg == "-h" || arg == "--help") {
-                printHelp();
-                return 1;
-            } else if (arg == "-w" || arg == "--workload") {
-                if (++i < argc) this->workload = argv[i];
-                else return printError(arg);
-            } else if (arg == "-g" || arg == "--gpus") {
-                if (++i < argc) this->gpus.push_back(std::stoi(argv[i]));
-                else return printError(arg);
-            } else if (arg == "-r" || arg == "--result") {
-                if (++i < argc) this->res = argv[i];
-                else return printError(arg);
-            } else if (arg == "-g_p_s" || arg == "--gpus-per-server") {
-                if (++i < argc) this->net_work_param.gpus_per_server = std::stoi(argv[i]);
-                else return printError(arg);
-            } else if (arg == "-busbw" || arg == "--bus-bandwidth") {
-                if (++i < argc) parseYaml(this->net_work_param,argv[i]);
-                else return printError(arg);
-            } else if (arg == "--dp-overlap-ratio" || arg == "-dp_o") {
-                if (++i < argc) this->net_work_param.dp_overlap_ratio = std::stof(argv[i]);
-                else return printError(arg);
-            } else if (arg == "--tp-overlap-ratio" || arg == "-tp_o") {
-                if (++i < argc) this->net_work_param.tp_overlap_ratio = std::stof(argv[i]);
-                else return printError(arg);
-            } else if (arg == "--ep-overlap-ratio" || arg == "-ep_o") {
-                if (++i < argc) this->net_work_param.ep_overlap_ratio = std::stof(argv[i]);
-                else return printError(arg);
-            } else if (arg == "--pp-overlap-ratio" || arg == "-pp_o") {
-                if (++i < argc) this->net_work_param.pp_overlap_ratio = std::stof(argv[i]);
-                else return printError(arg);
-            } else if (arg == "-v" || arg == "--visual") {
-                this->net_work_param.visual = 1;
-            }
-            else {
-                return printUnknownOption(arg);
-            }
+    if (this->res == "None" ){
+        std::string full_path = this->workload;
+        std::string model_info = full_path;
+        size_t last_slash_pos = full_path.find_last_of('/');
+        if (last_slash_pos != std::string::npos) {
+            model_info = full_path.substr(last_slash_pos + 1); 
         }
+        std::string model_name; 
+        int world_size = 0, tp = 0, pp = 0, ep = 0, gbs = 0, mbs = 0, seq = 0;
 
-        if (!this->gpus.empty()) {
-            this->net_work_param.nvswitch_num = this->gpus[0] / this->net_work_param.gpus_per_server;
-            this->net_work_param.switch_num = 120 + this->net_work_param.gpus_per_server;
-            this->net_work_param.node_num = this->net_work_param.nvswitch_num + this->net_work_param.switch_num + this->gpus[0];
+        
+        size_t world_size_pos = model_info.find("world_size");
+        if (world_size_pos != std::string::npos) {
+            model_name = model_info.substr(0, world_size_pos - 1); 
         }
-        if (this->res == "None" || this->res.back() == '/'){
-            std::string full_path = this->workload;
-            std::string model_info = full_path;
-            size_t last_slash_pos = full_path.find_last_of('/');
-            if (last_slash_pos != std::string::npos) {
-                model_info = full_path.substr(last_slash_pos + 1); 
-            }
-            std::string model_name; 
-            int world_size = 0, tp = 0, pp = 0, ep = 0, gbs = 0, mbs = 0, seq = 0;
 
-            size_t world_size_pos = model_info.find("world_size");
-            if (world_size_pos != std::string::npos) {
-                model_name = model_info.substr(0, world_size_pos - 1); 
+        
+        std::regex param_regex(R"((world_size|tp|pp|ep|gbs|mbs|seq)(\d+))");
+        std::smatch matches;
+
+        std::string params = model_info; 
+        while (std::regex_search(params, matches, param_regex)) {
+            std::string param_name = matches[1].str();
+            int param_value = std::stoi(matches[2].str());
+
+            if (param_name == "world_size") {
+                world_size = param_value;
+            } else if (param_name == "tp") {
+                tp = param_value;
+            } else if (param_name == "pp") {
+                pp = param_value;
+            } else if (param_name == "ep") {
+                ep = param_value;
+            } else if (param_name == "gbs") {
+                gbs = param_value;
+            } else if (param_name == "mbs") {
+                mbs = param_value;
+            } else if (param_name == "seq") {
+                seq = param_value;
             }
 
-            std::regex param_regex(R"((world_size|tp|pp|ep|gbs|mbs|seq)(\d+))");
-            std::smatch matches;
-
-            std::string params = model_info; 
-            while (std::regex_search(params, matches, param_regex)) {
-                std::string param_name = matches[1].str();
-                int param_value = std::stoi(matches[2].str());
-
-                if (param_name == "world_size") {
-                    world_size = param_value;
-                } else if (param_name == "tp") {
-                    tp = param_value;
-                } else if (param_name == "pp") {
-                    pp = param_value;
-                } else if (param_name == "ep") {
-                    ep = param_value;
-                } else if (param_name == "gbs") {
-                    gbs = param_value;
-                } else if (param_name == "mbs") {
-                    mbs = param_value;
-                } else if (param_name == "seq") {
-                    seq = param_value;
-                }
-                params = matches.suffix().str();
-            }
-
-            int dp = world_size / (tp * pp); 
-            double ga = static_cast<double>(gbs) / (dp * mbs); 
-
-            std::ostringstream result;
-            result << model_name << '-' 
-                << "tp" << tp << '-'
-                << "pp" << pp << '-'
-                << "dp" << dp << '-'
-                << "ga" << static_cast<int>(ga) << '-'
-                << "ep" << ep << '-'
-                << "NVL" << this->net_work_param.gpus_per_server << '-'
-                << "DP" << this->net_work_param.dp_overlap_ratio << '-' ;
-            if(this->res.back() == '/') {
-                this->res = this->res + result.str();
-            }
-            else{
-                this->res = result.str();
-            }
             
+            params = matches.suffix().str();
         }
-        return 0;
+
+        
+        int dp = world_size / (tp * pp); 
+        double ga = static_cast<double>(gbs) / (dp * mbs); 
+
+        std::ostringstream result;
+        result << model_name << '-' 
+            << "tp" << tp << '-'
+            << "pp" << pp << '-'
+            << "dp" << dp << '-'
+            << "ga" << static_cast<int>(ga) << '-'
+            << "ep" << ep << '-'
+            << "NVL" << this->net_work_param.gpus_per_server << '-'
+            << std::fixed << std::setprecision(1) << (this->net_work_param.bw_per_nic * 8) << "G" << '-'
+            << "DP" << this->net_work_param.dp_overlap_ratio << '-' ;
+        
+        this->res = result.str();
+        
+        
+    }
+    if (this->res_folder != "None"){
+        if (this->res_folder.back() != '/'){
+            this->res = this->res_folder + '/' + this->res;
+        }
+        else{
+            this->res = this->res_folder + this->res;
+        }
+
     }
-    ~UserParam(){}
+    return 0;
+}
+  ~UserParam(){}
 };
 
-#endif // __ASTRAPARAMPARSE_HH__
+#endif // __ASTRAPARAMPARSE_HH__
\ No newline at end of file
diff --git a/astra-sim-alibabacloud/astra-sim/system/Common.hh b/astra-sim-alibabacloud/astra-sim/system/Common.hh
index 766ac231..35ec6d00 100644
--- a/astra-sim-alibabacloud/astra-sim/system/Common.hh
+++ b/astra-sim-alibabacloud/astra-sim/system/Common.hh
@@ -11,7 +11,7 @@ LICENSE file in the root directory of this source tree.
 #include <vector>
 #include "AstraNetworkAPI.hh"
 
-enum class GPUType { A100, A800, H100, H800, NONE };
+enum class GPUType { A100, A800, H100, H800, NONE, H20};
 
 namespace AstraSim {
 #define CLOCK_PERIOD 1
diff --git a/astra-sim-alibabacloud/astra-sim/system/Sys.cc b/astra-sim-alibabacloud/astra-sim/system/Sys.cc
index 32094747..4f1ab16c 100644
--- a/astra-sim-alibabacloud/astra-sim/system/Sys.cc
+++ b/astra-sim-alibabacloud/astra-sim/system/Sys.cc
@@ -14,6 +14,7 @@ LICENSE file in the root directory of this source tree.
 #include "Common.hh"
 #include "RendezvousRecvData.hh"
 #include "RendezvousSendData.hh"
+#include "calbusbw.h"
 #include "astra-sim/system/collective/AllToAll.hh"
 #include "astra-sim/system/collective/DoubleBinaryTreeAllReduce.hh"
 #include "astra-sim/system/collective/HalvingDoubling.hh"
@@ -264,6 +265,11 @@ Sys::Sys(
     std::atexit(exiting);
     std::cout << "total nodes: " << total_nodes << std::endl;
   }
+  #ifdef ANALYTI
+  nic_ratio_data = readCSV(NIC_RATIO_PATH);
+  nvlink_ratio_data = readCSV(NVLINK_RATIO_PATH);
+  ata_ratio_data = readCSV(ATA_RATIO_PATH);
+  #endif
   NI->sim_init(MEM);
   memBus = new MemBus(
       "NPU",
diff --git a/astra-sim-alibabacloud/astra-sim/system/Sys.hh b/astra-sim-alibabacloud/astra-sim/system/Sys.hh
index 8c39cff7..2bbe3fda 100644
--- a/astra-sim-alibabacloud/astra-sim/system/Sys.hh
+++ b/astra-sim-alibabacloud/astra-sim/system/Sys.hh
@@ -148,7 +148,9 @@ class Sys : public Callable {
   int total_running_streams;
   std::map<int, std::list<BaseStream*>> active_Streams;
   std::map<int, std::list<int>> stream_priorities;
-
+  std::vector<std::vector<std::string>> nic_ratio_data;
+  std::vector<std::vector<std::string>> nvlink_ratio_data;
+  std::vector<std::vector<std::string>> ata_ratio_data;
   QueueLevels* vLevels;
   std::map<std::string, LogicalTopology*> logical_topologies;
   std::map<Tick, std::list<std::tuple<Callable*, EventType, CallData*>>>
diff --git a/astra-sim-alibabacloud/astra-sim/system/calbusbw.cc b/astra-sim-alibabacloud/astra-sim/system/calbusbw.cc
new file mode 100644
index 00000000..403c317c
--- /dev/null
+++ b/astra-sim-alibabacloud/astra-sim/system/calbusbw.cc
@@ -0,0 +1,463 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "calbusbw.h"
+#include <algorithm>
+#include <ctype.h>
+#include "astra-sim/system/AstraParamParse.hh"
+char info[1024] = "Success!";
+int retcode = 0;
+
+float calculateAlgoBw(CalculationParameters params) {
+    return 0.0; 
+}
+
+float getNvlinkBw(GPUType node_type) {
+    float nvlink_bw = 0.0;
+    if (node_type == GPUType::H100 || node_type == GPUType::H20 ) {
+        nvlink_bw = SM90_NVLINK_BW * H100_NVLINKS;
+    } else if (node_type == GPUType::H800) {
+        nvlink_bw = SM90_NVLINK_BW * H800_NVLINKS;
+    } else if (node_type == GPUType::A100) {
+        nvlink_bw = SM80_NVLINK_BW * A100_NVLINKS;
+    } else if (node_type == GPUType::A800) {
+        nvlink_bw = SM80_NVLINK_BW * A800_NVLINKS;
+    } else {
+        strcpy(info, "Warning: unknown machine type. Please choose from H20, H100, H800, A100, A800.");
+        retcode = 1;
+        return -1;
+    }
+    return nvlink_bw;
+}
+
+float getNicBw(char* nic_type) {
+    float nic_bw = 0.0;
+    if (strcmp(nic_type, "CX6") == 0 || strcmp(nic_type, "cx6") == 0) {
+        nic_bw = CX6_BW;
+    } else if (strcmp(nic_type, "CX7") == 0 || strcmp(nic_type, "cx7") == 0) {
+        nic_bw = CX7_BW;
+    } else if (strcmp(nic_type, "BF3") == 0 || strcmp(nic_type, "bf3") == 0) {
+        nic_bw = BF3_BW;
+    } else {
+        strcpy(info, "Warning: unknown NIC type. Please choose from CX6, CX7, BF3.");
+        retcode = 1;
+        return -1;
+    }
+    return nic_bw;
+}
+
+float calcTreeBusBw(int gpus_per_node, int node_count, float nvlink_bw, float nic_bw, float nics_per_node, float all_gather_bus_bw) {
+    int nranks = gpus_per_node * node_count;
+    if (nranks == 1) return 5000.0;
+    if (node_count == 1) {
+        return all_gather_bus_bw * (gpus_per_node-1) / gpus_per_node;
+    } else {
+        float algbw_nic = nic_bw * nics_per_node;
+        if (node_count == 2) {
+            algbw_nic *= 2;
+        } else if (node_count == 3) {
+            algbw_nic *= (4.0/3.0);
+        }
+        if (gpus_per_node == 1) {
+            return algbw_nic * (nranks-1) / nranks;
+        }
+        float algbw_nvlink = nvlink_bw * gpus_per_node / (gpus_per_node-1);
+        return (algbw_nic < algbw_nvlink) ? algbw_nic * (nranks-1) / nranks : algbw_nvlink * (nranks-1) / nranks;
+    }
+}
+
+float calcNVLSBusBw(int gpus_per_node, int node_count, float NVLS_bw, float nic_bw, float nics_per_node) {
+    int nranks = gpus_per_node * node_count;
+    
+    if (gpus_per_node != 8) return -1.0;
+    float algo_nvls_busbw = NVLS_bw * gpus_per_node / (gpus_per_node-1);
+
+    if (node_count == 1) {
+        return algo_nvls_busbw * (nranks-1) / nranks;
+    } else {
+        float algbw_nic = nic_bw * nics_per_node;
+        if (node_count == 2) {
+            algbw_nic *= 2;
+        } else if (node_count == 3) {
+            algbw_nic *= (4.0/3.0);
+        }
+        if (gpus_per_node == 1) {
+            return algbw_nic * (nranks-1) / nranks;
+        }
+        return (algbw_nic < algo_nvls_busbw) ? algbw_nic * (nranks-1) / nranks : algo_nvls_busbw * (nranks-1) / nranks;
+    }
+}
+
+int lower_compare(char *coll_type, const char *lower_str) {
+    //return strcasecmp(coll_type, lower_str);
+    char temp_str[strlen(coll_type) + 1];
+    
+    for (int i = 0; i < strlen(coll_type); i++) {
+        temp_str[i] = tolower((unsigned char)coll_type[i]);
+    }
+    temp_str[strlen(coll_type)] = '\0';  
+    
+    if (strcmp(temp_str, lower_str) == 0) {
+        return 0; 
+    }
+    return 1; 
+}
+
+float calculateBusBw(CalculationParameters* params) {
+    float nvlink_bw; 
+    if (params->bw_intra > 0.0) {
+        nvlink_bw = params->bw_intra;
+    } else {
+        nvlink_bw = getNvlinkBw(params->node_type);
+    }
+    float nic_bw;
+    if (params->bw_per_nic > 0.0) {
+        nic_bw = params->bw_per_nic;
+    } else {
+        nic_bw = getNicBw(params->nic_type);
+    }
+    float all_gather_bus_bw = 0.0;
+    
+    int gpus_per_node = params->gpus_pernode;
+    int nics_per_node = params->nics_pernode;
+    float real_nics_per_node = params->real_nics_pernode;
+    int node_count = params->node_count;
+    int nranks = node_count * gpus_per_node;
+    params->is_nvlink = false; //nvlink or nic
+    if (nvlink_bw <= 0 || nic_bw <= 0 || gpus_per_node < 1 || nics_per_node < 1 || node_count < 1) {
+        return -1;
+    }
+
+    if (real_nics_per_node * nic_bw > nvlink_bw) {
+        if (params->cross_nic == 2) params->cross_nic = 1;
+    } else {
+        if (params->cross_nic == 2) params->cross_nic = 0;
+    }
+
+    if (node_count == 1) {
+        all_gather_bus_bw = nvlink_bw;
+    } else {
+        if (gpus_per_node == 1) {
+            all_gather_bus_bw = nic_bw * real_nics_per_node;
+        } else {
+            all_gather_bus_bw = (nvlink_bw < nic_bw * real_nics_per_node) ? (params->is_nvlink = true, nvlink_bw) : nic_bw * real_nics_per_node;
+            if (params->cross_nic == 1) {
+                params->is_nvlink = false;
+                all_gather_bus_bw = (nvlink_bw * gpus_per_node / (gpus_per_node-1) < nic_bw * real_nics_per_node) ? (params->is_nvlink = true, nvlink_bw * gpus_per_node / (gpus_per_node-1) ): nic_bw * real_nics_per_node;
+            }
+        }
+    }
+
+    float tree_bus_bw = 0.0;
+    float nvls_bus_bw = 0.0;
+    tree_bus_bw = calcTreeBusBw(gpus_per_node, node_count, nvlink_bw, nic_bw, real_nics_per_node, all_gather_bus_bw);
+    if (params->node_type == GPUType::H100 || params->node_type == GPUType::H20) {
+        nvls_bus_bw = calcNVLSBusBw(gpus_per_node, node_count, H100_NVLS_BW, nic_bw, real_nics_per_node);
+    } else if (params->node_type == GPUType::H800) {
+        nvls_bus_bw = calcNVLSBusBw(gpus_per_node, node_count, H800_NVLS_BW, nic_bw, real_nics_per_node);
+    }
+
+    
+    if (lower_compare(params->coll_type, "allreduce") == 0) {
+        if (lower_compare(params->nccl_algo, "ring") == 0) {
+            return all_gather_bus_bw;
+        } else if (lower_compare(params->nccl_algo, "tree") == 0) {
+            return tree_bus_bw;
+        } else if (lower_compare(params->nccl_algo, "nvls") == 0 || lower_compare(params->nccl_algo, "nvlstree") == 0) {
+            if (lower_compare(params->nccl_algo, "nvls") == 0 && node_count > 1) params->nccl_algo = "nvlstree";
+            if (lower_compare(params->nccl_algo, "nvlstree") == 0 && node_count == 1) params->nccl_algo = "nvls";
+            if (gpus_per_node == 8) {
+                if (params->node_type == GPUType::H100 || params->node_type == GPUType::H800|| params->node_type == GPUType::H20) {
+                    return nvls_bus_bw;
+                } else {
+                    strcpy(info, "Warning: unsupported machine type for NVLS algorithm. Please choose from H20,H100,H800.");
+                    retcode = 1;
+                    return -1;
+                }
+            } else {
+                strcpy(info, "Warning: unsupported GPU count for NVLS algorithm. Please use 8 GPUs per node.");
+                retcode = 1;
+                return -1;
+            }
+        } else {
+            if (nvls_bus_bw > tree_bus_bw) {
+                if (all_gather_bus_bw > nvls_bus_bw) {
+                    params->nccl_algo = "Ring";
+                    return all_gather_bus_bw;
+                } else {
+                     if (node_count > 1) {
+                            params->nccl_algo = strdup("NVLSTree");
+                        } else {
+                            params->nccl_algo = strdup("NVLS");
+                        }
+                    return nvls_bus_bw;
+                }
+            } else {
+                if (all_gather_bus_bw > tree_bus_bw) {
+                    params->nccl_algo = "Ring";
+                    return all_gather_bus_bw;
+                } else {
+                    params->nccl_algo = "Tree";
+                    return tree_bus_bw;
+                }
+            }
+        }
+        
+
+    } else if (lower_compare(params->coll_type, "allgather") == 0) {
+        params->nccl_algo = "Ring";
+        return all_gather_bus_bw;
+
+    } else if (lower_compare(params->coll_type, "alltoall") == 0) {
+        params->nccl_algo = "none";
+        if (node_count == 1) {
+            params->is_nvlink = true;
+            return nvlink_bw;
+        }
+        return nic_bw * real_nics_per_node / gpus_per_node * (nranks-1) / ((node_count-1)*gpus_per_node) ;
+
+    } else if (lower_compare(params->coll_type, "broadcast") == 0) {
+        params->nccl_algo = "Ring";
+        return all_gather_bus_bw;
+
+    } else if (lower_compare(params->coll_type, "reducescatter") == 0) {
+        params->nccl_algo = "Ring";
+        return all_gather_bus_bw;
+
+    } else if (lower_compare(params->coll_type, "reduce") == 0) {
+        params->nccl_algo = "Ring";
+        return all_gather_bus_bw;
+
+    } else {
+        strcpy(info, "Warning: unknown collective type. Please choose from allreduce, allgather, alltoall, broadcast, reducescatter, reduce, multiallreduce.");
+        retcode = 1;
+        return -1;
+    }
+    return -1;
+}
+std::vector<std::vector<std::string>> readCSV(const std::string &filePath) {
+    std::ifstream file(filePath);
+    if (!file.is_open()) {
+        throw std::runtime_error("Failed to open file");
+    }
+    std::vector<std::vector<std::string>> data;
+    std::string line;
+    bool isFirstLine = true;
+
+
+    while (std::getline(file, line)) {
+
+        if (isFirstLine) {
+            isFirstLine = false;
+            continue;
+        }
+
+        std::stringstream lineStream(line);
+        std::string cell;
+        std::vector<std::string> rowData;
+        while (std::getline(lineStream, cell, ',')) {
+            cell.erase(0, cell.find_first_not_of(' ')); 
+            cell.erase(cell.find_last_not_of(' ') + 1); 
+            if (cell.empty()) {
+                cell = "1";
+            }
+            rowData.push_back(cell);
+        }
+        
+        if (!rowData.empty()) {
+            data.push_back(rowData);
+        }
+    }
+    
+    return data;
+}
+void printData(const std::vector<std::vector<std::string>> &data) {
+    for (const auto &row : data) {
+        for (const auto &cell : row) {
+            std::cout << cell << " "; 
+        }
+        std::cout << std::endl; 
+    }
+}
+
+
+BusBwResult cal_busbw(GPUType node_type,float bw_intra,float bw_per_nic, float nics_pernode,int node_count,char* coll_type,int gpus_pernode,char* nic_type) {
+    BusBwResult result;
+    CalculationParameters params;
+    memset(&params, 0, sizeof(params));
+    retcode = 0;
+    params.node_count = node_count;
+    
+    params.gpus_pernode = gpus_pernode;
+    params.nics_pernode = nics_pernode;
+    params.bw_per_nic = bw_per_nic;
+    params.bw_intra = bw_intra;
+    params.group_split_mask = 0;
+    params.nccl_algo = "ring";
+    params.cross_nic = 2;
+    params.coll_type = coll_type;
+    params.node_type = node_type;
+    params.nic_type = nic_type;
+    // if (argc > 1 && strcmp(argv[1], "--help") == 0) {
+    //     print_usage(argv[0]);
+    //     return 1;
+    // }
+    // for (int i = 1; i < argc; i++){
+    //     parseParams(argc, argv, &i, &params);
+    // }
+    params.real_nics_pernode = (float)params.nics_pernode;
+
+    if (params.node_count < 1) {
+        strcpy(info, "Error: The number of nodes must be greater than 0.");
+        retcode = 1;
+    }
+    if (lower_compare(params.nccl_algo, "none")) {
+        if (lower_compare(params.nccl_algo, "ring") && lower_compare(params.nccl_algo, "tree") && lower_compare(params.nccl_algo, "nvls") && lower_compare(params.nccl_algo, "nvlstree")) {
+            strcpy(info, "Warning: the selected algorithm is not supported.");
+        }
+    }
+
+    if (params.group_split_mask != 0 && params.group_split_mask != 1 && params.group_split_mask != 3 && params.group_split_mask != 7) {
+        strcpy(info, "Warning: the value of group_split_mask can only be 0, 1, 3, 7. Default is 0.");
+        params.group_split_mask = 0;
+    } else if (params.group_split_mask != 0 && params.gpus_pernode != 8) {
+        // 当前只支持8GPU机型的multi- 测试
+        strcpy(info, "Warning: currently, only 8GPU nodes are supported for split_mask testing.");
+        params.group_split_mask = 0;
+    }
+
+    if (lower_compare(params.coll_type, "allreduce")  && lower_compare(params.nccl_algo, "none") && lower_compare(params.nccl_algo, "ring")) {
+        strcpy(info, "Warning: only allreduce can use other algorithms except ring.");
+        params.nccl_algo = "Ring";
+    }
+
+    if (lower_compare(params.coll_type, "multiallreduce") == 0 || lower_compare(params.coll_type, "multialltoall") == 0) {
+        params.nccl_algo = "Ring";
+        params.cross_nic = 2;
+        if (params.gpus_pernode == 8) {
+            params.group_split_mask = 7;
+        } else {
+            params.real_nics_pernode = (float)params.nics_pernode / params.gpus_pernode;
+            params.gpus_pernode = 1;
+        }
+        params.coll_type += strlen("multi");
+    }
+
+    if (params.group_split_mask == 7) {
+        params.gpus_pernode = 1;
+        params.real_nics_pernode = (float)params.nics_pernode / 8.0;
+    } else if (params.group_split_mask == 3) {
+        params.gpus_pernode = 2;
+        params.real_nics_pernode = (float)params.nics_pernode / 4.0;
+    } else if (params.group_split_mask == 1) {
+        params.gpus_pernode = 4;
+        params.real_nics_pernode = (float)params.nics_pernode / 2.0;
+    }
+    
+    if (params.gpus_pernode * params.node_count == 1) {
+        strcpy(info, "Warning: collective communication requires the participation of at least two gpus.");
+        retcode = 1;
+    }
+
+    float busBw = 0.0;
+
+    if (retcode == 0){
+        busBw = calculateBusBw(&params);
+    }
+
+    if (params.node_count == 1) {
+        params.cross_nic = 0;
+    }
+
+    if (retcode == 1) {
+        printf("{\"retcode\":%d, \"info\":\"%s\", \"theoretical_bus_bw\":\"-1\", \"nccl_algo\":\"none\", \"cross_nic\":2}\n", retcode, info);
+    } else {
+        printf("{\"retcode\":%d, \"info\":\"%s\", \"node_count\":%d, \"nic_type\":\"%s\", \"gpus_pernode\":%d, \"nics_pernode\":%.1f, \"coll_type\":\"%s\", \"cross_nic\":%d, \"nccl_algo\":\"%s\", \"theoretical_bus_bw_GBps\":%.3lf}\n", retcode, info, params.node_count, params.nic_type, params.gpus_pernode, params.real_nics_pernode, params.coll_type, params.cross_nic, params.nccl_algo, busBw);
+    }
+    result.busbw = busBw;
+    
+    result.is_nvlink = params.is_nvlink;
+    return result;
+}
+struct DataRow {
+    std::string size;
+    std::vector<double> values;
+};
+double interpolate(double size, double size1, double size2, double value1, double value2) {
+    return value1 + (value2 - value1) * (size - size1) / (size2 - size1);
+}
+float getValue(double datasize, int _temp_nnode, const std::vector<std::vector<std::string>>& data) {
+    int colIndex = 0;
+
+    if (_temp_nnode == 1) {
+        colIndex = 1; 
+    } else if (_temp_nnode == 2) {
+        colIndex = 2;  
+    } else if (_temp_nnode == 4) {
+        colIndex = 3;  
+    } else if (_temp_nnode == 8) {
+        colIndex = 4;  
+    } else if (_temp_nnode == 16) {
+        colIndex = 5;  
+    } else if (_temp_nnode == 32) {
+        colIndex = 6;  
+    } else if (_temp_nnode == 64) {
+        colIndex = 7;  
+    } else if (_temp_nnode == 128) {
+        colIndex = 8;  
+    } else if (_temp_nnode == 9) {
+        colIndex = 9;  
+    }
+    else {
+        colIndex = 5; 
+    }
+    if (datasize == 0) {
+        return 1.0;
+    }
+    double minSize = std::stod(data.front()[0]);
+    if (datasize < minSize) {
+        return std::stod(data.front()[colIndex])/std::stod(data.back()[colIndex]);
+    }
+
+    for (size_t i = 0; i < data.size() - 1; ++i) {
+        double size1 = std::stod(data[i][0]);
+        double size2 = std::stod(data[i+1][0]);
+        if (datasize >= size1 && datasize <= size2) {
+            double value1 = std::stod(data[i][colIndex]);
+            double value2 = std::stod(data[i+1][colIndex]);
+            return interpolate(datasize, size1, size2, value1, value2)/std::stod(data.back()[colIndex]);
+        }
+    }
+    throw std::runtime_error("Data size out of range");
+}
+
+float cal_ratio(std::vector<std::vector<std::string>> nic_ratio_data,std::vector<std::vector<std::string>> nvlink_ratio_data,std::vector<std::vector<std::string>> ata_ratio_data,uint64_t data_size,int nranks,int tp_size,uint32_t gpus_per_server,char* group_type,char* coll_type,bool is_nvlink){
+    if ((strcmp(coll_type, "allgather") == 0 || strcmp(coll_type, "reducescatter") == 0 ) && strcmp(group_type, "tp") == 0 ){
+        auto data = is_nvlink ? nvlink_ratio_data : nic_ratio_data;
+        int _temp_nnode = (tp_size < gpus_per_server) ? 1 : tp_size / gpus_per_server ;
+        return getValue(data_size, _temp_nnode, data);
+    } else if (strcmp(coll_type, "alltoall") == 0 && strcmp(group_type, "ep") == 0){
+        auto data = ata_ratio_data;
+        if(tp_size * nranks <= gpus_per_server){
+            return getValue(data_size, 1, data);
+        }else if(tp_size >= gpus_per_server){    //multi
+            return getValue(data_size, 9, data);
+        } else {
+            int _temp_nnode = (tp_size * nranks) / gpus_per_server;
+            return getValue(data_size, _temp_nnode, data);
+        }
+    } else if (strcmp(coll_type, "alltoall") == 0 && strcmp(group_type, "tp") == 0){
+        auto data = ata_ratio_data;
+        if (tp_size <= gpus_per_server){
+            return getValue(data_size, 1, data);
+        } else {
+            int _temp_nnode = tp_size / gpus_per_server;
+            return getValue(data_size, _temp_nnode, data);
+        }
+    }
+    else if(strcmp(group_type, "dp") == 0 || strcmp(group_type, "dp_ep") == 0){
+        return 1; 
+    }else{
+        return 1;
+    }
+}
\ No newline at end of file
diff --git a/astra-sim-alibabacloud/astra-sim/system/calbusbw.h b/astra-sim-alibabacloud/astra-sim/system/calbusbw.h
new file mode 100644
index 00000000..a6698227
--- /dev/null
+++ b/astra-sim-alibabacloud/astra-sim/system/calbusbw.h
@@ -0,0 +1,50 @@
+#ifndef CALBUSBW_H
+#define CALBUSBW_H
+#include "astra-sim/system/AstraParamParse.hh"
+#define SM80_NVLINK_BW 20.0
+#define SM90_NVLINK_BW 20.6
+#define H100_NVLINKS   18
+#define H800_NVLINKS   8
+#define A100_NVLINKS   12
+#define A800_NVLINKS   8
+
+#define CX6_BW 23.5 // 25
+#define CX7_BW 48.5 // 50
+#define BF3_BW 48.5 // 50
+
+#define H100_NVLS_BW 475.0
+#define H800_NVLS_BW 215.0
+
+#define H800_PCIE_BW 51.2 // 64*0.8
+#define H100_PCIE_BW 51.2 // 64*0.8
+#define A100_PCIE_BW 25.6 // 32*0.8
+#define A800_PCIE_BW 25.6 // 32*0.8
+#define NIC_RATIO_PATH "astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv"
+#define NVLINK_RATIO_PATH "astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv"
+#define ATA_RATIO_PATH "astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv"
+typedef struct {
+    GPUType node_type;
+    int node_count;
+    char* nic_type;
+    char* coll_type;
+    int cross_nic;
+    char* nccl_algo;
+    int gpus_pernode;
+    float nics_pernode;
+    float bw_per_nic;
+    float bw_intra;
+    int group_split_mask;
+    float real_nics_pernode;
+    bool is_nvlink;
+} CalculationParameters;
+
+typedef struct {
+    float busbw;
+    int is_nvlink;
+} BusBwResult;
+
+BusBwResult cal_busbw(GPUType node_type, float bw_intra, float bw_per_nic, float nics_pernode, int node_count, char* coll_type, int gpus_pernode, char* nic_type);
+float cal_ratio(std::vector<std::vector<std::string>> nic_ratio_data,std::vector<std::vector<std::string>> nvlink_ratio_data,std::vector<std::vector<std::string>> ata_ratio_data,uint64_t data_size,int nranks,int tp_size,uint32_t gpus_per_server,char* group_type,char* coll_type,bool is_nvlink);
+std::vector<std::vector<std::string>> readCSV(const std::string &filePath);
+float getValue(double datasize, int _temp_nnode, const std::vector<std::vector<std::string>>& data);
+#endif // CALBUSBW_H
\ No newline at end of file
diff --git a/astra-sim-alibabacloud/astra-sim/workload/Layer.cc b/astra-sim-alibabacloud/astra-sim/workload/Layer.cc
old mode 100755
new mode 100644
index d4dd1c7e..ce2363de
--- a/astra-sim-alibabacloud/astra-sim/workload/Layer.cc
+++ b/astra-sim-alibabacloud/astra-sim/workload/Layer.cc
@@ -8,7 +8,9 @@ LICENSE file in the root directory of this source tree.
 #include "astra-sim/system/IntData.hh"
 #include "astra-sim/system/MockNcclLog.h"
 #include "astra-sim/system/AstraParamParse.hh"
-
+// #ifdef ANALYTI
+#include "astra-sim/system/calbusbw.h"
+// #endif
 
 #ifdef NS3_MPI
 #include "ns3/mpi-interface.h"
@@ -499,7 +501,7 @@ LayerData Layer::report(
       data = "total exposed comm," + to_string(total_exposed) + ",total comp," + to_string(total_compute) + ",total time," + to_string(total_time);
       EndToEnd->write_line(data);
 
-      Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp) * 1e9) / FREQ );
+      Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp_overlap_ratio) * 1e9) / FREQ );
       Expose_PP_time *= (1-param->net_work_param.pp_overlap_ratio) ;
       //pp bubble time
       pre_bubble_time *= static_cast<double>(PP_size - 1) / (GA * vpp);
@@ -729,7 +731,7 @@ LayerData Layer::report(
             total_exposed = (((double)Sys::boostedTick()) / FREQ ) - total_compute;
         }
         //pp commtime
-        Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp) * 1e9) / FREQ );
+        Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp_overlap_ratio) * 1e9) / FREQ );
         Expose_PP_time *= (1-param->net_work_param.pp_overlap_ratio) ;
         //pp bubble time
         pre_bubble_time *= static_cast<double>(PP_size - 1) / (GA * vpp);
@@ -852,6 +854,67 @@ static std::pair<int, int> binarySearch(const std::vector<long>& arr, long targe
     return std::make_pair(leftIndex, rightIndex);
 }
 
+char* comtype_to_coll(ComType comtype) {
+    switch (comtype) {
+        case ComType::None:
+            return "none";
+        case ComType::Reduce_Scatter:
+            return "reducescatter";
+        case ComType::All_Gather:
+            return "allgather";
+        case ComType::All_Reduce:
+            return "allreduce";
+        case ComType::All_to_All:
+            return "alltoall";
+        case ComType::All_Reduce_All_to_All:
+            return "all_reduce_all_to_all";
+        case ComType::All_Reduce_NVLS:
+            return "all_reduce_nvls";
+        default:
+            return "unknown";
+    }
+}
+float Layer::cal_ratio(
+    uint64_t data_size,
+    int nranks,
+    int tp_size,
+    uint32_t gpus_per_server,
+    MockNccl::GroupType group_type,
+    char* coll_type,
+    bool is_nvlink){
+    UserParam* param = UserParam::getInstance();
+    auto nic_ratio_data = generator->nic_ratio_data;
+    auto nvlink_ratio_data = generator->nvlink_ratio_data;
+    auto ata_ratio_data = generator->ata_ratio_data;
+    if ((strcmp(coll_type, "allgather") == 0 || strcmp(coll_type, "reducescatter") == 0 ) && group_type == MockNccl::GroupType::TP){
+        auto data = is_nvlink ? nvlink_ratio_data : nic_ratio_data;
+        int _temp_nnode = (tp_size < gpus_per_server) ? 1 : tp_size / gpus_per_server ;
+        return getValue(data_size, _temp_nnode, data);
+    } else if (strcmp(coll_type, "alltoall") == 0 && group_type == MockNccl::GroupType::EP){
+        auto data = ata_ratio_data;
+        if(tp_size * nranks <= gpus_per_server){
+            return getValue(data_size, 1, data);
+        }else if(tp_size >= gpus_per_server){    //multi
+            return getValue(data_size, 9, data);
+        } else {
+            int _temp_nnode = (tp_size * nranks) / gpus_per_server;
+            return getValue(data_size, _temp_nnode, data);
+        }
+    } else if (strcmp(coll_type, "alltoall") == 0 && group_type == MockNccl::GroupType::TP){
+        auto data = ata_ratio_data;
+        if (tp_size <= gpus_per_server){
+            return getValue(data_size, 1, data);
+        } else {
+            int _temp_nnode = tp_size / gpus_per_server;
+            return getValue(data_size, _temp_nnode, data);
+        }
+    }
+    else if(group_type == MockNccl::GroupType::DP || group_type == MockNccl::GroupType::DP_EP){
+        return 1; 
+    }else{
+        return 1;
+    }
+}
 Tick Layer::compute_time(
     ComType comtype,
     int tp_size,
@@ -866,102 +929,86 @@ Tick Layer::compute_time(
     return 0;
   }
 
-    bool DP_comm_inside = false;
-    bool TP_comm_inside = false;
-    bool EP_comm_inside = false;
+
     int n_ranks;
     int nnics;
     uint32_t  gpus_per_server = param->net_work_param.gpus_per_server;
     GPUType gpu_type = param->net_work_param.gpu_type;
-    float tp_ar = param->net_work_param.tp_ar;
-    float tp_ag = param->net_work_param.tp_ag;
-    float tp_ata = param->net_work_param.tp_ata;
-    float ep_ata = param->net_work_param.ep_ata;
-    float dp_ag = param->net_work_param.dp_ag;
-    float ep_ag = param->net_work_param.ep_ag;
-    float dp_ar = param->net_work_param.dp_ar;
-    float ep_ar = param->net_work_param.ep_ar;
-    if (group_type == MockNccl::GroupType::TP || group_type == MockNccl::GroupType::EP) {
-      n_ranks = tp_size;
-      if (n_ranks <= gpus_per_server)
-        TP_comm_inside = true;
-    } else if (
-        group_type == MockNccl::GroupType::DP ||
-        group_type == MockNccl::GroupType::EP || group_type == MockNccl::GroupType::DP_EP) {
-      n_ranks = nranks;
-      nnics = gpus_per_server / tp_size;
-      if (all_gpus == gpus_per_server && tp_size <= gpus_per_server)
-        DP_comm_inside = true;
-        
-    }
-    if (TP_comm_inside || DP_comm_inside) {
-      if (comtype == ComType::All_Reduce) {
+    float nvlink_bw = param->net_work_param.nvlink_bw;
+    float bw_per_nic = param->net_work_param.bw_per_nic;
+    uint32_t nics_per_server = param->net_work_param.nics_per_server;
+    char* nic_type =  param->net_work_param.nic_type;
+    char* coll_type = comtype_to_coll(comtype);
+    float bw_ratio = 1.0;
+    BusBwResult result;
 
-        comp_time = data_size * GBps / tp_ar * 1e9 * 2 * //tp2 ep8 164.8 tp16 218 
-            (nranks - 1) / (nranks / 1.0);
+    if (1 < data_size && data_size < 1048576){
+      if(nranks == 2) comp_time = 10000;
+      if(nranks == 4) comp_time = 12000;
+      if(nranks == 8) comp_time = 15000;
+      if(nranks == 16) comp_time = 66000;
+      if(nranks == 32) comp_time = 135000;
+      if(nranks == 64) comp_time = 200000;
+      if(nranks == 128) comp_time = 320000;
+      return comp_time;
+    }
+  if (group_type == MockNccl::GroupType::TP ){
+      //TP_comm_inside
+      if(tp_size <= gpus_per_server){
+      result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,nics_per_server,1,coll_type,tp_size,nic_type);
+      }else{
+        int _node_count = tp_size / gpus_per_server;
+        result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,nics_per_server,_node_count,coll_type,gpus_per_server,nic_type);
       }
-       else if (group_type == MockNccl::GroupType::TP && (
-          comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter )) {
-          comp_time = data_size * GBps / tp_ag * 1e9 * 
-              (nranks - 1) / (nranks / 1.0);
-      } else if (group_type == MockNccl::GroupType::TP && (
-          comtype == ComType::All_to_All)) {
-            comp_time = data_size * GBps / tp_ata * 1e9 * 
-                (nranks - 1) / (nranks / 1.0);
-      }else if (group_type == MockNccl::GroupType::EP && 
-        comtype == ComType::All_to_All) {
+    }else if (group_type == MockNccl::GroupType::EP && nranks > 1)
+    {
+     if(tp_size * nranks <= gpus_per_server){
+      uint32_t _temp_gpus_per_server = gpus_per_server / tp_size;
+      result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,nics_per_server,1,coll_type,_temp_gpus_per_server,nic_type);
 
-            comp_time = data_size * GBps / ep_ata * 1e9 * 
-                (nranks - 1) / (nranks / 1.0);
-
-      }else {
-        comp_time = 0;
-      }
-    } else if (!TP_comm_inside && group_type == MockNccl::GroupType::TP) {
-      if (comtype == ComType::All_Reduce) {
-            comp_time = data_size  * GBps /
-                tp_ar * 1e9 * 2 *
-                (nranks - 1) / (nranks / 1.0);
-      } else if (
-          comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter) {
-            comp_time = data_size * GBps /
-                tp_ag * 1e9 *
-                (nranks - 1) / (nranks / 1.0);
-      } else if (
-          comtype == ComType::All_to_All) {
-            comp_time = data_size * GBps /
-                tp_ata * 1e9 *
-                (nranks - 1) / (nranks / 1.0);
-      } else {
-        comp_time = 0;
-      }
-    } else if (
-        !DP_comm_inside &&
-        (group_type == MockNccl::GroupType::DP)) {
-      if (comtype == ComType::All_Reduce) {
-            comp_time = data_size  * GBps / dp_ar * 1e9 * 
-                2 * (nranks - 1) / (nranks / 1.0);
-      } else if (
-          comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter || comtype == ComType::All_to_All) {
-            comp_time = data_size * GBps / dp_ag * 1e9 * //tp2 ep8 48.5
-                (nranks - 1) / (nranks / 1.0);
-      } else {
-        comp_time = 0;
+     }else{
+      int _node_count = (tp_size * nranks) / gpus_per_server;
+      uint32_t _temp_gpus_per_server = (gpus_per_server / tp_size > 1) ? (gpus_per_server / tp_size) : 1;
+      float _temp_nics_per_server = (tp_size > gpus_per_server) ? (nics_per_server / gpus_per_server) : (nics_per_server / tp_size);
+      result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,_node_count,coll_type,_temp_gpus_per_server,nic_type);
+     }
+    }else if(group_type == MockNccl::GroupType::DP && nranks > 1){
+      if(tp_size <= gpus_per_server){
+        uint32_t _temp_gpus_per_server = gpus_per_server / tp_size;
+        float _temp_nics_per_server = nics_per_server / tp_size;
+        result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,_temp_gpus_per_server,nic_type);
+      }else{
+        float _temp_nics_per_server = nics_per_server / gpus_per_server;
+        result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,1,nic_type);
       }
-    }else if (
-        !DP_comm_inside &&
-        ( group_type == MockNccl::GroupType::DP_EP)) {
-      if (comtype == ComType::All_Reduce) {
-            comp_time = data_size * GBps / ep_ar* 1e9 * 
-                2 * (nranks - 1) / (nranks / 1.0);
-      } else if (
-          comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter || comtype == ComType::All_to_All) {
-            comp_time = data_size * GBps / ep_ag * 1e9 * //tp2 ep8 48.5
-                (nranks - 1) / (nranks / 1.0);
-      } else {
-        comp_time = 0;
+    }else if(group_type == MockNccl::GroupType::DP_EP && nranks > 1){
+      if(tp_size * ep_size <= gpus_per_server){
+        float _temp_nics_per_server = nics_per_server / (tp_size * ep_size);
+        uint32_t _temp_gpus_per_server = gpus_per_server / (tp_size * ep_size);
+        result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,_temp_gpus_per_server,nic_type);
+       
+      }else{
+        float _temp_nics_per_server = nics_per_server / gpus_per_server;
+        result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,1,nic_type);
       }
+    }else{
+      
+      comp_time = 0;
+      return comp_time;
     }
+    
+    bw_ratio = cal_ratio(data_size,nranks,tp_size,gpus_per_server,group_type,coll_type,result.is_nvlink);
+    cout<<"Communication Type: "<<coll_type<<"Communication Group: "<<group_type<<"Group Size: "<< nranks<<"Data Size: "<<data_size<<"Ratio: "<<bw_ratio<<"Bottleneck is nvlink: "<<result.is_nvlink<<endl;
+    if(comtype == ComType::All_Reduce){
+      comp_time = data_size * GBps / (bw_ratio * result.busbw) * 1e9 * 2 * 
+            (nranks - 1) / (nranks / 1.0);
+            
+    } else {
+      comp_time = data_size * GBps / (bw_ratio * result.busbw) * 1e9  * 
+            (nranks - 1) / (nranks / 1.0);
+             
+    }
+    
   return comp_time;
 }
 
diff --git a/astra-sim-alibabacloud/astra-sim/workload/Layer.hh b/astra-sim-alibabacloud/astra-sim/workload/Layer.hh
old mode 100755
new mode 100644
index bab93166..14de9675
--- a/astra-sim-alibabacloud/astra-sim/workload/Layer.hh
+++ b/astra-sim-alibabacloud/astra-sim/workload/Layer.hh
@@ -164,6 +164,7 @@ class Layer : public Callable, public StreamStat {
       SchedulingPolicy pref_scheduling,
       CollectiveBarrier barrier);
   void print_involved_dimensions(std::vector<bool>& involved_dimensions);
+  float cal_ratio(uint64_t data_size,int nranks,int tp_size,uint32_t gpus_per_server,MockNccl::GroupType group_type,char* coll_type,bool is_nvlink);
   std::pair<float,float> compute_busbw(ComType comtype, int nranks,uint64_t data_size,Tick total_comm);
   Tick compute_time(ComType comtype, int tp_size,int nranks , uint64_t data_size, MockNccl::GroupType group_type, int all_gpus,int ep_size);
 };
diff --git a/astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv b/astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv
new file mode 100644
index 00000000..2866883a
--- /dev/null
+++ b/astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv
@@ -0,0 +1,12 @@
+Size,1 Node(NVLinkx),2Node-16GPU(NIC ),4Node-32GPU(NIC),8Node-64GPU(NIC),16Node-128GPU(NIC),32Node-256GPU(NIC),64Node-512GPU(NIC),128Node-1024GPU(NIC),Multi 16,Multi 32,Muiti 64,Multi (NIC)
+16777216,0.44,0.52,0.67,0.64,0.62,,,,0.86,0.86,0.77,
+33554432,0.51,0.63,0.68,0.67,0.66,,,,0.92,0.9,0.8,
+67108864,0.6,0.72,0.83,0.82,0.66,,,,0.94,0.91,0.83,
+134217728,0.65,0.76,0.9,0.85,0.72,,,,0.94,0.92,0.87,
+268435456,0.7,0.79,0.92,0.87,0.75,,,,0.94,0.93,0.91,0.9
+536870912,0.71,0.8,0.94,0.89,0.81,,,,0.95,0.94,0.93,0.95
+1073741824,0.74,0.81,0.94,0.92,0.85,,,,0.96,0.95,0.94,0.95
+2147483648,0.76,0.81,0.95,0.93,0.88,,,,0.96,0.96,0.95,0.95
+4294967296,0.77,0.8,0.95,0.94,0.89,,,,0.96,0.96,0.95,0.95
+8589934592,0.78,0.8,0.95,0.94,0.9,,,,0.96,0.96,0.96,0.95
+17179869184,0.78,0.8,0.95,0.94,0.9,,,,0.96,0.96,0.96,0.95
\ No newline at end of file
diff --git a/astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv b/astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv
new file mode 100644
index 00000000..14951e6e
--- /dev/null
+++ b/astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv
@@ -0,0 +1,12 @@
+Size,1 Node(NVLinkx),2Node-16GPU(NIC ),4Node-32GPU(NIC),8Node-64GPU(NIC),16Node-128GPU(NIC),32Node-256GPU(NIC),64Node-512GPU(NIC),128Node-1024GPU(NIC),Multi 16,Multi 32,Muiti 64,Multi (NIC)
+16777216,0.45,0.32,0.24,0.13,0.05,0.044,0.021,0.0087,0.86,0.86,0.77,
+33554432,0.55,0.47,0.38,0.24,0.11,0.052,0.042,0.021225,0.92,0.9,0.8,
+67108864,0.65,0.5,0.53,0.39,0.21,0.106,0.051,0.04405,0.94,0.91,0.83,
+134217728,0.69,0.67,0.67,0.54,0.38,0.205,0.107,0.0533,0.94,0.92,0.87,
+268435456,0.73,0.85,0.77,0.69,0.57,0.387,0.209,0.1027,0.94,0.93,0.91,0.9
+536870912,0.76,0.89,0.85,0.78,0.73,0.562,0.384,0.1902,0.95,0.94,0.93,0.95
+1073741824,0.77,0.92,0.93,0.9,0.76,0.74,0.563,0.314575,0.96,0.95,0.94,0.95
+2147483648,0.78,0.94,0.94,0.94,0.92,0.758,0.745,0.553525,0.96,0.96,0.95,0.95
+4294967296,0.79,0.95,0.95,0.95,0.95,0.923,0.755,0.740175,0.96,0.96,0.95,0.95
+8589934592,0.8,0.95,0.96,0.96,0.96,0.953,0.914,0.7648,0.96,0.96,0.96,0.95
+17179869184,0.81,0.96,0.96,0.96,0.96,0.957,0.958,0.919175,0.96,0.96,0.96,0.95
\ No newline at end of file
diff --git a/astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv b/astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv
new file mode 100644
index 00000000..e4a8eaaf
--- /dev/null
+++ b/astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv
@@ -0,0 +1,12 @@
+Size,1 Node(NVLinkx),2Node-16GPU(NIC ),4Node-32GPU(NIC),8Node-64GPU(NIC),16Node-128GPU(NIC),32Node-256GPU(NIC),64Node-512GPU(NIC),128Node-1024GPU(NIC),Multi 16,Multi 32,Muiti 64,Multi (NIC)
+16777216,0.45,0.55,0.34,0.16,0.08,,,,0.86,0.86,0.77,
+33554432,0.55,0.68,0.6,0.34,0.16,,,,0.92,0.9,0.8,
+67108864,0.65,0.74,0.71,0.62,0.33,,,,0.94,0.91,0.83,
+134217728,0.69,0.8,0.74,0.7,0.65,,,,0.94,0.92,0.87,
+268435456,0.73,0.84,0.79,0.78,0.72,,,,0.94,0.93,0.91,0.9
+536870912,0.76,0.86,0.86,0.82,0.75,,,,0.95,0.94,0.93,0.95
+1073741824,0.77,0.87,0.87,0.86,0.83,,,,0.96,0.95,0.94,0.95
+2147483648,0.78,0.87,0.87,0.87,0.86,,,,0.96,0.96,0.95,0.95
+4294967296,0.79,0.88,0.88,0.88,0.87,,,,0.96,0.96,0.95,0.95
+8589934592,0.8,0.88,0.88,0.88,0.88,,,,0.96,0.96,0.96,0.95
+17179869184,0.81,0.88,0.88,0.88,0.88,,,,0.96,0.96,0.96,0.95
\ No newline at end of file