diff --git a/README.md b/README.md index e96d12c2..a90524f1 100755 --- a/README.md +++ b/README.md @@ -140,6 +140,11 @@ $ ./scripts/build.sh -c ns3 $ ./bin/SimAI_analytical -w example/workload_analytical.txt -g 9216 -g_p_s 8 -r test- -busbw example/busbw.yaml ``` +For calculating bus bandwidth autolly, please try the following command: +```bash +$ ./bin/SimAI_analytical -w ./example/workload_analytical.txt -g 9216 -nv 360 -nic 48.5 -n_p_s 8 -g_p_s 8 -r example- +``` + ## Use SimAI-Simulation ```bash diff --git a/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc b/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc index 6d7a60d1..48fa0da1 100644 --- a/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc +++ b/astra-sim-alibabacloud/astra-sim/network_frontend/analytical/AnalyticalAstra.cc @@ -67,8 +67,8 @@ struct user_param { int main(int argc,char *argv[]) { UserParam* param = UserParam::getInstance(); - if (param->parseArg(argc,argv)) { - std::cerr << "-h, --help Help message" << std::endl; + if (param->parse(argc,argv)) { + std::cerr << "-h, --help Help message" << std::endl; return -1; } param->mode = ModeType::ANALYTICAL; diff --git a/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh b/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh index 72fb4dee..2cb0d3bb 100644 --- a/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh +++ b/astra-sim-alibabacloud/astra-sim/system/AstraParamParse.hh @@ -32,9 +32,16 @@ #include "Common.hh" #define BUSBW_PATH "" using namespace std; -#include + enum class ModeType { NONE, ASTRA_SIM, MOCKNCCL, ANALYTICAL }; +#include +#include +#include +#include +#include + + struct NetWorkParam{ uint32_t node_num; uint32_t switch_num; @@ -43,29 +50,17 @@ struct NetWorkParam{ uint32_t nvswitch_num; uint32_t gpus_per_server; uint32_t nics_per_server; - uint32_t nvlink_bw; - uint32_t nic_bw; - GPUType gpu_type; - float tp_ar = -1.0f; - float tp_ag = -1.0f; - float tp_rs = -1.0f; - float tp_ata = -1.0f; - float dp_ar = -1.0f; - float dp_ag = -1.0f; - float dp_rs = -1.0f; - float dp_ata = -1.0f; - float ep_ar = -1.0f; - float ep_ag = -1.0f; - float ep_rs = -1.0f; - float ep_ata = -1.0f; - float pp = -1.0f; + float nvlink_bw = -1.0; + float bw_per_nic = -1.0; + char* nic_type = "cx7"; + bool visual = 0; float dp_overlap_ratio = 0; float tp_overlap_ratio = 0; float ep_overlap_ratio = 0; float pp_overlap_ratio = 1; - std::vector NVswitchs; - std::vector> all_gpus; - int visual = 0; + GPUType gpu_type; + std::vectorNVswitchs; + std::vector>all_gpus; }; class UserParam { @@ -82,219 +77,170 @@ private: } public: - int thread; - std::vector gpus; - string workload; - string res = "None"; - int comm_scale; - ModeType mode; - NetWorkParam net_work_param; + int thread; + std::vector gpus; + std::string workload; + std::string res = "None"; + std::string res_folder = "None"; + int comm_scale; + ModeType mode; + NetWorkParam net_work_param; - static UserParam* getInstance(){ - std::lock_guard lock(mtx); - if(instance == nullptr){ - instance = new UserParam(); - } - return instance; - } - - void parseYaml(NetWorkParam& params, const std::string& filename) { - std::ifstream file(BUSBW_PATH + filename); - if (!file) { - std::cerr << "Unable to open file: " << filename << std::endl; - exit(-1); - } - std::string line; - std::string currentSection; - std::getline(file, line); - while (std::getline(file, line)) { - // Remove whitespace - - line.erase(0, line.find_first_not_of(' ')); - line.erase(line.find_last_not_of(' ') + 1); - if (line.empty() || line[0] == '#') continue; - if (line.back() == ':') { - currentSection = line.substr(0, line.size() - 1); - } else { - std::istringstream ss(line); - std::string key, valueStr; - if (std::getline(ss, key, ':') && ss >> valueStr) { - key.erase(key.find_last_not_of(' ') + 1); - - // Remove part after comma - auto commaPos = key.find(','); - if (commaPos != std::string::npos) { - key = key.substr(0, commaPos); - } + static UserParam* getInstance(){ + std::lock_guard lock(mtx); + if(instance == nullptr){ + instance = new UserParam(); + } + return instance; + } - if (valueStr != "null") { - float value = std::stof(valueStr); - - if (currentSection == "TP") { - if (key == "allreduce") params.tp_ar = value; - else if (key == "allgather") params.tp_ag = value; - else if (key == "reducescatter") params.tp_rs = value; - else if (key == "alltoall") params.tp_ata = value; - } else if (currentSection == "DP") { - if (key == "allreduce") params.dp_ar = value; - else if (key == "allgather") params.dp_ag = value; - else if (key == "reducescatter") params.dp_rs = value; - else if (key == "alltoall") params.dp_ata = value; - } else if (currentSection == "EP") { - if (key == "allreduce") params.ep_ar = value; - else if (key == "allgather") params.ep_ag = value; - else if (key == "reducescatter") params.ep_rs = value; - else if (key == "alltoall") params.ep_ata = value; - } else if (currentSection == "PP") { - if (key == "busbw") params.pp = value; - } - } - } +int parse(int argc, char *argv[]) { + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "-h" || arg == "--help") { + std::cout << "-w, --workload Workloads, default none" << std::endl; + std::cout << "-g, --gpus Number of GPUs, default 1" << std::endl; + std::cout << "-g_p_s, --gpus-per-server GPUs per server" << std::endl; + std::cout << "-r, --result Output results path" << std::endl; + std::cout << "-nv, --nvlink Nvlink" << std::endl; + std::cout << "-nic, --nic_busbw NIC busbw" << std::endl; + std::cout << "-n_p_s, --bus-bandwidth Bus bandwidth file" << std::endl; + std::cout << "-nic_t, --nic_type NIC type(cx7,bf3),choose when disable nic " << std::endl; + std::cout << "-g_type, --gpu_type GPU type(A100,H100),choose when disable nvlink " << std::endl; + std::cout << "-v, --visual Enable visual output" << std::endl; + std::cout << "-dp_o, --dp_overlap dp overlap ratio(Default 0)" << std::endl; + std::cout << "-ep_o, --ep_overlap ep overlap ratio(Default 0)" << std::endl; + std::cout << "-tp_o, --tp_overlap tp overlap ratio(Default 0)" << std::endl; + std::cout << "-pp_o, --pp_overlap pp overlap ratio(Default 1)" << std::endl; + return 1; + } else if (arg == "-w" || arg == "--workload") { + if (++i < argc) this->workload = argv[i]; + } else if (arg == "-g" || arg == "--gpus") { + if (++i < argc) this->gpus.push_back(std::stoi(argv[i])); + } else if (arg == "-r" || arg == "--result") { + if (++i < argc) this->res = argv[i]; + } else if (arg == "-r_f" || arg == "--result_folder") { + if (++i < argc) this->res_folder = argv[i]; + } else if (arg == "-g_p_s" || arg == "--gpus-per-server") { + if (++i < argc) this->net_work_param.gpus_per_server = std::stoi(argv[i]); + } else if (arg == "-nv" || arg == "--nvlink") { + if (++i < argc) this->net_work_param.nvlink_bw = std::stof(argv[i]); + } else if (arg == "-nic"|| arg == "--nic_busbw") { + if (++i < argc) this->net_work_param.bw_per_nic = std::stof(argv[i]); + } else if (arg == "-n_p_s" || arg == "--nic_per_server") { + if (++i < argc) this->net_work_param.nics_per_server = std::stoi(argv[i]); + } else if (arg == "-nic_t" || arg == "--nic_type") { + if (++i < argc) this->net_work_param.nic_type = argv[i]; + } else if (arg == "-g_type" || arg == "--gpu_type") { + if (++i < argc) { + std::string gpu_type = argv[i]; + if (gpu_type == "A100" || gpu_type == "a100") this->net_work_param.gpu_type = GPUType::A100; + else if (gpu_type == "A800" || gpu_type == "a800" ) this->net_work_param.gpu_type = GPUType::A800; + else if (gpu_type == "H100" || gpu_type == "h100") this->net_work_param.gpu_type = GPUType::H100; + else if (gpu_type == "H800" || gpu_type == "h800") this->net_work_param.gpu_type = GPUType::H800; + else if (gpu_type == "H20" || gpu_type == "h20") this->net_work_param.gpu_type = GPUType::H20; + else this->net_work_param.gpu_type = GPUType::NONE; } + }else if (arg == "-v" || arg == "--visual") { + if (++i < argc) this->net_work_param.visual = std::stoi(argv[i]); + }else if (arg == "--dp_overlap" || arg == "-dp_o") { + if (++i < argc) this->net_work_param.dp_overlap_ratio = std::stof(argv[i]); + }else if (arg == "--tp_overlap" || arg == "-tp_o") { + if (++i < argc) this->net_work_param.tp_overlap_ratio = std::stof(argv[i]); + }else if (arg == "--ep_overlap" || arg == "-ep_o") { + if (++i < argc) this->net_work_param.ep_overlap_ratio = std::stof(argv[i]); + }else if (arg == "--pp_overlap" || arg == "-pp_o") { + if (++i < argc) this->net_work_param.pp_overlap_ratio = std::stof(argv[i]); + } + else { + return 1; } } - void printHelp() const { - std::cout << " ____ _ _ ___ _ _ _ _ _ \n" - << "/ ___|(_)_ __ ___ / \\ |_ _| / \\ _ __ __ _| |_ _| |_(_) ___ __ _| |\n" - << "\\___ \\| | '_ ' _ \\ / _ \\ | |_____ / _ \\ | '_ \\ / _' | | | | | __| |/ __/ _' | |\n" - << " ___) | | | | | | |/ ___ \\ | |_____/ ___ \\| | | | (_| | | |_| | |_| | (_| (_| | |\n" - << "|____/|_|_| |_| |_/_/ \\_\\___| /_/ \\_\\_| |_|\\__,_|_|\\__, |\\__|_|\\___\\__,_|_|\n" - << " |___/ \n"; - std::cout << "-w, --workload Workloads, must set" << std::endl; - std::cout << "-g, --gpus Number of GPUs, default 1" << std::endl; - std::cout << "-g_p_s, --gpus-per-server GPUs per server" << std::endl; - std::cout << "-r, --result Output results path, default: ./results/" << std::endl; - std::cout << "-busbw, --bus-bandwidth Bus bandwidth file, must set" << std::endl; - std::cout << "-v, --visual Enable visual output (Default disable)" << std::endl; - std::cout << "-dp_o, --dp-overlap-ratio DP overlap ratio [float: 0.0-1.0] (Default: 0.0)" << std::endl; - std::cout << "-ep_o, --ep-overlap-ratio EP overlap ratio [float: 0.0-1.0] (Default: 0.0)" << std::endl; - std::cout << "-tp_o, --tp-overlap-ratio TP overlap ratio [float: 0.0-1.0] (Default: 0.0)" << std::endl; - std::cout << "-pp_o, --pp-overlap-ratio PP overlap ratio [float: 0.0-1.0] (Default: 1.0)" << std::endl; - } - - int printError(const std::string& arg) const { - std::cerr << "Error: Missing value for argument '" << arg << "'." << std::endl; - return 1; - } - - int printUnknownOption(const std::string& arg) const { - std::cerr << "Error: Unknown option '" << arg << "'." << std::endl; - return 1; + if (!this->gpus.empty()) { + this->net_work_param.nvswitch_num = this->gpus[0] / this->net_work_param.gpus_per_server; + this->net_work_param.switch_num = 120 + this->net_work_param.gpus_per_server; + this->net_work_param.node_num = this->net_work_param.nvswitch_num + this->net_work_param.switch_num + this->gpus[0]; } - int parseArg(int argc, char *argv[]) { - for (int i = 1; i < argc; ++i) { - std::string arg = argv[i]; - if (arg == "-h" || arg == "--help") { - printHelp(); - return 1; - } else if (arg == "-w" || arg == "--workload") { - if (++i < argc) this->workload = argv[i]; - else return printError(arg); - } else if (arg == "-g" || arg == "--gpus") { - if (++i < argc) this->gpus.push_back(std::stoi(argv[i])); - else return printError(arg); - } else if (arg == "-r" || arg == "--result") { - if (++i < argc) this->res = argv[i]; - else return printError(arg); - } else if (arg == "-g_p_s" || arg == "--gpus-per-server") { - if (++i < argc) this->net_work_param.gpus_per_server = std::stoi(argv[i]); - else return printError(arg); - } else if (arg == "-busbw" || arg == "--bus-bandwidth") { - if (++i < argc) parseYaml(this->net_work_param,argv[i]); - else return printError(arg); - } else if (arg == "--dp-overlap-ratio" || arg == "-dp_o") { - if (++i < argc) this->net_work_param.dp_overlap_ratio = std::stof(argv[i]); - else return printError(arg); - } else if (arg == "--tp-overlap-ratio" || arg == "-tp_o") { - if (++i < argc) this->net_work_param.tp_overlap_ratio = std::stof(argv[i]); - else return printError(arg); - } else if (arg == "--ep-overlap-ratio" || arg == "-ep_o") { - if (++i < argc) this->net_work_param.ep_overlap_ratio = std::stof(argv[i]); - else return printError(arg); - } else if (arg == "--pp-overlap-ratio" || arg == "-pp_o") { - if (++i < argc) this->net_work_param.pp_overlap_ratio = std::stof(argv[i]); - else return printError(arg); - } else if (arg == "-v" || arg == "--visual") { - this->net_work_param.visual = 1; - } - else { - return printUnknownOption(arg); - } + if (this->res == "None" ){ + std::string full_path = this->workload; + std::string model_info = full_path; + size_t last_slash_pos = full_path.find_last_of('/'); + if (last_slash_pos != std::string::npos) { + model_info = full_path.substr(last_slash_pos + 1); } + std::string model_name; + int world_size = 0, tp = 0, pp = 0, ep = 0, gbs = 0, mbs = 0, seq = 0; - if (!this->gpus.empty()) { - this->net_work_param.nvswitch_num = this->gpus[0] / this->net_work_param.gpus_per_server; - this->net_work_param.switch_num = 120 + this->net_work_param.gpus_per_server; - this->net_work_param.node_num = this->net_work_param.nvswitch_num + this->net_work_param.switch_num + this->gpus[0]; + + size_t world_size_pos = model_info.find("world_size"); + if (world_size_pos != std::string::npos) { + model_name = model_info.substr(0, world_size_pos - 1); } - if (this->res == "None" || this->res.back() == '/'){ - std::string full_path = this->workload; - std::string model_info = full_path; - size_t last_slash_pos = full_path.find_last_of('/'); - if (last_slash_pos != std::string::npos) { - model_info = full_path.substr(last_slash_pos + 1); - } - std::string model_name; - int world_size = 0, tp = 0, pp = 0, ep = 0, gbs = 0, mbs = 0, seq = 0; - size_t world_size_pos = model_info.find("world_size"); - if (world_size_pos != std::string::npos) { - model_name = model_info.substr(0, world_size_pos - 1); + + std::regex param_regex(R"((world_size|tp|pp|ep|gbs|mbs|seq)(\d+))"); + std::smatch matches; + + std::string params = model_info; + while (std::regex_search(params, matches, param_regex)) { + std::string param_name = matches[1].str(); + int param_value = std::stoi(matches[2].str()); + + if (param_name == "world_size") { + world_size = param_value; + } else if (param_name == "tp") { + tp = param_value; + } else if (param_name == "pp") { + pp = param_value; + } else if (param_name == "ep") { + ep = param_value; + } else if (param_name == "gbs") { + gbs = param_value; + } else if (param_name == "mbs") { + mbs = param_value; + } else if (param_name == "seq") { + seq = param_value; } - std::regex param_regex(R"((world_size|tp|pp|ep|gbs|mbs|seq)(\d+))"); - std::smatch matches; - - std::string params = model_info; - while (std::regex_search(params, matches, param_regex)) { - std::string param_name = matches[1].str(); - int param_value = std::stoi(matches[2].str()); - - if (param_name == "world_size") { - world_size = param_value; - } else if (param_name == "tp") { - tp = param_value; - } else if (param_name == "pp") { - pp = param_value; - } else if (param_name == "ep") { - ep = param_value; - } else if (param_name == "gbs") { - gbs = param_value; - } else if (param_name == "mbs") { - mbs = param_value; - } else if (param_name == "seq") { - seq = param_value; - } - params = matches.suffix().str(); - } - - int dp = world_size / (tp * pp); - double ga = static_cast(gbs) / (dp * mbs); - - std::ostringstream result; - result << model_name << '-' - << "tp" << tp << '-' - << "pp" << pp << '-' - << "dp" << dp << '-' - << "ga" << static_cast(ga) << '-' - << "ep" << ep << '-' - << "NVL" << this->net_work_param.gpus_per_server << '-' - << "DP" << this->net_work_param.dp_overlap_ratio << '-' ; - if(this->res.back() == '/') { - this->res = this->res + result.str(); - } - else{ - this->res = result.str(); - } + params = matches.suffix().str(); } - return 0; + + + int dp = world_size / (tp * pp); + double ga = static_cast(gbs) / (dp * mbs); + + std::ostringstream result; + result << model_name << '-' + << "tp" << tp << '-' + << "pp" << pp << '-' + << "dp" << dp << '-' + << "ga" << static_cast(ga) << '-' + << "ep" << ep << '-' + << "NVL" << this->net_work_param.gpus_per_server << '-' + << std::fixed << std::setprecision(1) << (this->net_work_param.bw_per_nic * 8) << "G" << '-' + << "DP" << this->net_work_param.dp_overlap_ratio << '-' ; + + this->res = result.str(); + + + } + if (this->res_folder != "None"){ + if (this->res_folder.back() != '/'){ + this->res = this->res_folder + '/' + this->res; + } + else{ + this->res = this->res_folder + this->res; + } + } - ~UserParam(){} + return 0; +} + ~UserParam(){} }; -#endif // __ASTRAPARAMPARSE_HH__ +#endif // __ASTRAPARAMPARSE_HH__ \ No newline at end of file diff --git a/astra-sim-alibabacloud/astra-sim/system/Common.hh b/astra-sim-alibabacloud/astra-sim/system/Common.hh index 766ac231..35ec6d00 100644 --- a/astra-sim-alibabacloud/astra-sim/system/Common.hh +++ b/astra-sim-alibabacloud/astra-sim/system/Common.hh @@ -11,7 +11,7 @@ LICENSE file in the root directory of this source tree. #include #include "AstraNetworkAPI.hh" -enum class GPUType { A100, A800, H100, H800, NONE }; +enum class GPUType { A100, A800, H100, H800, NONE, H20}; namespace AstraSim { #define CLOCK_PERIOD 1 diff --git a/astra-sim-alibabacloud/astra-sim/system/Sys.cc b/astra-sim-alibabacloud/astra-sim/system/Sys.cc index 32094747..4f1ab16c 100644 --- a/astra-sim-alibabacloud/astra-sim/system/Sys.cc +++ b/astra-sim-alibabacloud/astra-sim/system/Sys.cc @@ -14,6 +14,7 @@ LICENSE file in the root directory of this source tree. #include "Common.hh" #include "RendezvousRecvData.hh" #include "RendezvousSendData.hh" +#include "calbusbw.h" #include "astra-sim/system/collective/AllToAll.hh" #include "astra-sim/system/collective/DoubleBinaryTreeAllReduce.hh" #include "astra-sim/system/collective/HalvingDoubling.hh" @@ -264,6 +265,11 @@ Sys::Sys( std::atexit(exiting); std::cout << "total nodes: " << total_nodes << std::endl; } + #ifdef ANALYTI + nic_ratio_data = readCSV(NIC_RATIO_PATH); + nvlink_ratio_data = readCSV(NVLINK_RATIO_PATH); + ata_ratio_data = readCSV(ATA_RATIO_PATH); + #endif NI->sim_init(MEM); memBus = new MemBus( "NPU", diff --git a/astra-sim-alibabacloud/astra-sim/system/Sys.hh b/astra-sim-alibabacloud/astra-sim/system/Sys.hh index 8c39cff7..2bbe3fda 100644 --- a/astra-sim-alibabacloud/astra-sim/system/Sys.hh +++ b/astra-sim-alibabacloud/astra-sim/system/Sys.hh @@ -148,7 +148,9 @@ class Sys : public Callable { int total_running_streams; std::map> active_Streams; std::map> stream_priorities; - + std::vector> nic_ratio_data; + std::vector> nvlink_ratio_data; + std::vector> ata_ratio_data; QueueLevels* vLevels; std::map logical_topologies; std::map>> diff --git a/astra-sim-alibabacloud/astra-sim/system/calbusbw.cc b/astra-sim-alibabacloud/astra-sim/system/calbusbw.cc new file mode 100644 index 00000000..403c317c --- /dev/null +++ b/astra-sim-alibabacloud/astra-sim/system/calbusbw.cc @@ -0,0 +1,463 @@ +#include +#include +#include +#include "calbusbw.h" +#include +#include +#include "astra-sim/system/AstraParamParse.hh" +char info[1024] = "Success!"; +int retcode = 0; + +float calculateAlgoBw(CalculationParameters params) { + return 0.0; +} + +float getNvlinkBw(GPUType node_type) { + float nvlink_bw = 0.0; + if (node_type == GPUType::H100 || node_type == GPUType::H20 ) { + nvlink_bw = SM90_NVLINK_BW * H100_NVLINKS; + } else if (node_type == GPUType::H800) { + nvlink_bw = SM90_NVLINK_BW * H800_NVLINKS; + } else if (node_type == GPUType::A100) { + nvlink_bw = SM80_NVLINK_BW * A100_NVLINKS; + } else if (node_type == GPUType::A800) { + nvlink_bw = SM80_NVLINK_BW * A800_NVLINKS; + } else { + strcpy(info, "Warning: unknown machine type. Please choose from H20, H100, H800, A100, A800."); + retcode = 1; + return -1; + } + return nvlink_bw; +} + +float getNicBw(char* nic_type) { + float nic_bw = 0.0; + if (strcmp(nic_type, "CX6") == 0 || strcmp(nic_type, "cx6") == 0) { + nic_bw = CX6_BW; + } else if (strcmp(nic_type, "CX7") == 0 || strcmp(nic_type, "cx7") == 0) { + nic_bw = CX7_BW; + } else if (strcmp(nic_type, "BF3") == 0 || strcmp(nic_type, "bf3") == 0) { + nic_bw = BF3_BW; + } else { + strcpy(info, "Warning: unknown NIC type. Please choose from CX6, CX7, BF3."); + retcode = 1; + return -1; + } + return nic_bw; +} + +float calcTreeBusBw(int gpus_per_node, int node_count, float nvlink_bw, float nic_bw, float nics_per_node, float all_gather_bus_bw) { + int nranks = gpus_per_node * node_count; + if (nranks == 1) return 5000.0; + if (node_count == 1) { + return all_gather_bus_bw * (gpus_per_node-1) / gpus_per_node; + } else { + float algbw_nic = nic_bw * nics_per_node; + if (node_count == 2) { + algbw_nic *= 2; + } else if (node_count == 3) { + algbw_nic *= (4.0/3.0); + } + if (gpus_per_node == 1) { + return algbw_nic * (nranks-1) / nranks; + } + float algbw_nvlink = nvlink_bw * gpus_per_node / (gpus_per_node-1); + return (algbw_nic < algbw_nvlink) ? algbw_nic * (nranks-1) / nranks : algbw_nvlink * (nranks-1) / nranks; + } +} + +float calcNVLSBusBw(int gpus_per_node, int node_count, float NVLS_bw, float nic_bw, float nics_per_node) { + int nranks = gpus_per_node * node_count; + + if (gpus_per_node != 8) return -1.0; + float algo_nvls_busbw = NVLS_bw * gpus_per_node / (gpus_per_node-1); + + if (node_count == 1) { + return algo_nvls_busbw * (nranks-1) / nranks; + } else { + float algbw_nic = nic_bw * nics_per_node; + if (node_count == 2) { + algbw_nic *= 2; + } else if (node_count == 3) { + algbw_nic *= (4.0/3.0); + } + if (gpus_per_node == 1) { + return algbw_nic * (nranks-1) / nranks; + } + return (algbw_nic < algo_nvls_busbw) ? algbw_nic * (nranks-1) / nranks : algo_nvls_busbw * (nranks-1) / nranks; + } +} + +int lower_compare(char *coll_type, const char *lower_str) { + //return strcasecmp(coll_type, lower_str); + char temp_str[strlen(coll_type) + 1]; + + for (int i = 0; i < strlen(coll_type); i++) { + temp_str[i] = tolower((unsigned char)coll_type[i]); + } + temp_str[strlen(coll_type)] = '\0'; + + if (strcmp(temp_str, lower_str) == 0) { + return 0; + } + return 1; +} + +float calculateBusBw(CalculationParameters* params) { + float nvlink_bw; + if (params->bw_intra > 0.0) { + nvlink_bw = params->bw_intra; + } else { + nvlink_bw = getNvlinkBw(params->node_type); + } + float nic_bw; + if (params->bw_per_nic > 0.0) { + nic_bw = params->bw_per_nic; + } else { + nic_bw = getNicBw(params->nic_type); + } + float all_gather_bus_bw = 0.0; + + int gpus_per_node = params->gpus_pernode; + int nics_per_node = params->nics_pernode; + float real_nics_per_node = params->real_nics_pernode; + int node_count = params->node_count; + int nranks = node_count * gpus_per_node; + params->is_nvlink = false; //nvlink or nic + if (nvlink_bw <= 0 || nic_bw <= 0 || gpus_per_node < 1 || nics_per_node < 1 || node_count < 1) { + return -1; + } + + if (real_nics_per_node * nic_bw > nvlink_bw) { + if (params->cross_nic == 2) params->cross_nic = 1; + } else { + if (params->cross_nic == 2) params->cross_nic = 0; + } + + if (node_count == 1) { + all_gather_bus_bw = nvlink_bw; + } else { + if (gpus_per_node == 1) { + all_gather_bus_bw = nic_bw * real_nics_per_node; + } else { + all_gather_bus_bw = (nvlink_bw < nic_bw * real_nics_per_node) ? (params->is_nvlink = true, nvlink_bw) : nic_bw * real_nics_per_node; + if (params->cross_nic == 1) { + params->is_nvlink = false; + all_gather_bus_bw = (nvlink_bw * gpus_per_node / (gpus_per_node-1) < nic_bw * real_nics_per_node) ? (params->is_nvlink = true, nvlink_bw * gpus_per_node / (gpus_per_node-1) ): nic_bw * real_nics_per_node; + } + } + } + + float tree_bus_bw = 0.0; + float nvls_bus_bw = 0.0; + tree_bus_bw = calcTreeBusBw(gpus_per_node, node_count, nvlink_bw, nic_bw, real_nics_per_node, all_gather_bus_bw); + if (params->node_type == GPUType::H100 || params->node_type == GPUType::H20) { + nvls_bus_bw = calcNVLSBusBw(gpus_per_node, node_count, H100_NVLS_BW, nic_bw, real_nics_per_node); + } else if (params->node_type == GPUType::H800) { + nvls_bus_bw = calcNVLSBusBw(gpus_per_node, node_count, H800_NVLS_BW, nic_bw, real_nics_per_node); + } + + + if (lower_compare(params->coll_type, "allreduce") == 0) { + if (lower_compare(params->nccl_algo, "ring") == 0) { + return all_gather_bus_bw; + } else if (lower_compare(params->nccl_algo, "tree") == 0) { + return tree_bus_bw; + } else if (lower_compare(params->nccl_algo, "nvls") == 0 || lower_compare(params->nccl_algo, "nvlstree") == 0) { + if (lower_compare(params->nccl_algo, "nvls") == 0 && node_count > 1) params->nccl_algo = "nvlstree"; + if (lower_compare(params->nccl_algo, "nvlstree") == 0 && node_count == 1) params->nccl_algo = "nvls"; + if (gpus_per_node == 8) { + if (params->node_type == GPUType::H100 || params->node_type == GPUType::H800|| params->node_type == GPUType::H20) { + return nvls_bus_bw; + } else { + strcpy(info, "Warning: unsupported machine type for NVLS algorithm. Please choose from H20,H100,H800."); + retcode = 1; + return -1; + } + } else { + strcpy(info, "Warning: unsupported GPU count for NVLS algorithm. Please use 8 GPUs per node."); + retcode = 1; + return -1; + } + } else { + if (nvls_bus_bw > tree_bus_bw) { + if (all_gather_bus_bw > nvls_bus_bw) { + params->nccl_algo = "Ring"; + return all_gather_bus_bw; + } else { + if (node_count > 1) { + params->nccl_algo = strdup("NVLSTree"); + } else { + params->nccl_algo = strdup("NVLS"); + } + return nvls_bus_bw; + } + } else { + if (all_gather_bus_bw > tree_bus_bw) { + params->nccl_algo = "Ring"; + return all_gather_bus_bw; + } else { + params->nccl_algo = "Tree"; + return tree_bus_bw; + } + } + } + + + } else if (lower_compare(params->coll_type, "allgather") == 0) { + params->nccl_algo = "Ring"; + return all_gather_bus_bw; + + } else if (lower_compare(params->coll_type, "alltoall") == 0) { + params->nccl_algo = "none"; + if (node_count == 1) { + params->is_nvlink = true; + return nvlink_bw; + } + return nic_bw * real_nics_per_node / gpus_per_node * (nranks-1) / ((node_count-1)*gpus_per_node) ; + + } else if (lower_compare(params->coll_type, "broadcast") == 0) { + params->nccl_algo = "Ring"; + return all_gather_bus_bw; + + } else if (lower_compare(params->coll_type, "reducescatter") == 0) { + params->nccl_algo = "Ring"; + return all_gather_bus_bw; + + } else if (lower_compare(params->coll_type, "reduce") == 0) { + params->nccl_algo = "Ring"; + return all_gather_bus_bw; + + } else { + strcpy(info, "Warning: unknown collective type. Please choose from allreduce, allgather, alltoall, broadcast, reducescatter, reduce, multiallreduce."); + retcode = 1; + return -1; + } + return -1; +} +std::vector> readCSV(const std::string &filePath) { + std::ifstream file(filePath); + if (!file.is_open()) { + throw std::runtime_error("Failed to open file"); + } + std::vector> data; + std::string line; + bool isFirstLine = true; + + + while (std::getline(file, line)) { + + if (isFirstLine) { + isFirstLine = false; + continue; + } + + std::stringstream lineStream(line); + std::string cell; + std::vector rowData; + while (std::getline(lineStream, cell, ',')) { + cell.erase(0, cell.find_first_not_of(' ')); + cell.erase(cell.find_last_not_of(' ') + 1); + if (cell.empty()) { + cell = "1"; + } + rowData.push_back(cell); + } + + if (!rowData.empty()) { + data.push_back(rowData); + } + } + + return data; +} +void printData(const std::vector> &data) { + for (const auto &row : data) { + for (const auto &cell : row) { + std::cout << cell << " "; + } + std::cout << std::endl; + } +} + + +BusBwResult cal_busbw(GPUType node_type,float bw_intra,float bw_per_nic, float nics_pernode,int node_count,char* coll_type,int gpus_pernode,char* nic_type) { + BusBwResult result; + CalculationParameters params; + memset(¶ms, 0, sizeof(params)); + retcode = 0; + params.node_count = node_count; + + params.gpus_pernode = gpus_pernode; + params.nics_pernode = nics_pernode; + params.bw_per_nic = bw_per_nic; + params.bw_intra = bw_intra; + params.group_split_mask = 0; + params.nccl_algo = "ring"; + params.cross_nic = 2; + params.coll_type = coll_type; + params.node_type = node_type; + params.nic_type = nic_type; + // if (argc > 1 && strcmp(argv[1], "--help") == 0) { + // print_usage(argv[0]); + // return 1; + // } + // for (int i = 1; i < argc; i++){ + // parseParams(argc, argv, &i, ¶ms); + // } + params.real_nics_pernode = (float)params.nics_pernode; + + if (params.node_count < 1) { + strcpy(info, "Error: The number of nodes must be greater than 0."); + retcode = 1; + } + if (lower_compare(params.nccl_algo, "none")) { + if (lower_compare(params.nccl_algo, "ring") && lower_compare(params.nccl_algo, "tree") && lower_compare(params.nccl_algo, "nvls") && lower_compare(params.nccl_algo, "nvlstree")) { + strcpy(info, "Warning: the selected algorithm is not supported."); + } + } + + if (params.group_split_mask != 0 && params.group_split_mask != 1 && params.group_split_mask != 3 && params.group_split_mask != 7) { + strcpy(info, "Warning: the value of group_split_mask can only be 0, 1, 3, 7. Default is 0."); + params.group_split_mask = 0; + } else if (params.group_split_mask != 0 && params.gpus_pernode != 8) { + // 当前只支持8GPU机型的multi- 测试 + strcpy(info, "Warning: currently, only 8GPU nodes are supported for split_mask testing."); + params.group_split_mask = 0; + } + + if (lower_compare(params.coll_type, "allreduce") && lower_compare(params.nccl_algo, "none") && lower_compare(params.nccl_algo, "ring")) { + strcpy(info, "Warning: only allreduce can use other algorithms except ring."); + params.nccl_algo = "Ring"; + } + + if (lower_compare(params.coll_type, "multiallreduce") == 0 || lower_compare(params.coll_type, "multialltoall") == 0) { + params.nccl_algo = "Ring"; + params.cross_nic = 2; + if (params.gpus_pernode == 8) { + params.group_split_mask = 7; + } else { + params.real_nics_pernode = (float)params.nics_pernode / params.gpus_pernode; + params.gpus_pernode = 1; + } + params.coll_type += strlen("multi"); + } + + if (params.group_split_mask == 7) { + params.gpus_pernode = 1; + params.real_nics_pernode = (float)params.nics_pernode / 8.0; + } else if (params.group_split_mask == 3) { + params.gpus_pernode = 2; + params.real_nics_pernode = (float)params.nics_pernode / 4.0; + } else if (params.group_split_mask == 1) { + params.gpus_pernode = 4; + params.real_nics_pernode = (float)params.nics_pernode / 2.0; + } + + if (params.gpus_pernode * params.node_count == 1) { + strcpy(info, "Warning: collective communication requires the participation of at least two gpus."); + retcode = 1; + } + + float busBw = 0.0; + + if (retcode == 0){ + busBw = calculateBusBw(¶ms); + } + + if (params.node_count == 1) { + params.cross_nic = 0; + } + + if (retcode == 1) { + printf("{\"retcode\":%d, \"info\":\"%s\", \"theoretical_bus_bw\":\"-1\", \"nccl_algo\":\"none\", \"cross_nic\":2}\n", retcode, info); + } else { + printf("{\"retcode\":%d, \"info\":\"%s\", \"node_count\":%d, \"nic_type\":\"%s\", \"gpus_pernode\":%d, \"nics_pernode\":%.1f, \"coll_type\":\"%s\", \"cross_nic\":%d, \"nccl_algo\":\"%s\", \"theoretical_bus_bw_GBps\":%.3lf}\n", retcode, info, params.node_count, params.nic_type, params.gpus_pernode, params.real_nics_pernode, params.coll_type, params.cross_nic, params.nccl_algo, busBw); + } + result.busbw = busBw; + + result.is_nvlink = params.is_nvlink; + return result; +} +struct DataRow { + std::string size; + std::vector values; +}; +double interpolate(double size, double size1, double size2, double value1, double value2) { + return value1 + (value2 - value1) * (size - size1) / (size2 - size1); +} +float getValue(double datasize, int _temp_nnode, const std::vector>& data) { + int colIndex = 0; + + if (_temp_nnode == 1) { + colIndex = 1; + } else if (_temp_nnode == 2) { + colIndex = 2; + } else if (_temp_nnode == 4) { + colIndex = 3; + } else if (_temp_nnode == 8) { + colIndex = 4; + } else if (_temp_nnode == 16) { + colIndex = 5; + } else if (_temp_nnode == 32) { + colIndex = 6; + } else if (_temp_nnode == 64) { + colIndex = 7; + } else if (_temp_nnode == 128) { + colIndex = 8; + } else if (_temp_nnode == 9) { + colIndex = 9; + } + else { + colIndex = 5; + } + if (datasize == 0) { + return 1.0; + } + double minSize = std::stod(data.front()[0]); + if (datasize < minSize) { + return std::stod(data.front()[colIndex])/std::stod(data.back()[colIndex]); + } + + for (size_t i = 0; i < data.size() - 1; ++i) { + double size1 = std::stod(data[i][0]); + double size2 = std::stod(data[i+1][0]); + if (datasize >= size1 && datasize <= size2) { + double value1 = std::stod(data[i][colIndex]); + double value2 = std::stod(data[i+1][colIndex]); + return interpolate(datasize, size1, size2, value1, value2)/std::stod(data.back()[colIndex]); + } + } + throw std::runtime_error("Data size out of range"); +} + +float cal_ratio(std::vector> nic_ratio_data,std::vector> nvlink_ratio_data,std::vector> ata_ratio_data,uint64_t data_size,int nranks,int tp_size,uint32_t gpus_per_server,char* group_type,char* coll_type,bool is_nvlink){ + if ((strcmp(coll_type, "allgather") == 0 || strcmp(coll_type, "reducescatter") == 0 ) && strcmp(group_type, "tp") == 0 ){ + auto data = is_nvlink ? nvlink_ratio_data : nic_ratio_data; + int _temp_nnode = (tp_size < gpus_per_server) ? 1 : tp_size / gpus_per_server ; + return getValue(data_size, _temp_nnode, data); + } else if (strcmp(coll_type, "alltoall") == 0 && strcmp(group_type, "ep") == 0){ + auto data = ata_ratio_data; + if(tp_size * nranks <= gpus_per_server){ + return getValue(data_size, 1, data); + }else if(tp_size >= gpus_per_server){ //multi + return getValue(data_size, 9, data); + } else { + int _temp_nnode = (tp_size * nranks) / gpus_per_server; + return getValue(data_size, _temp_nnode, data); + } + } else if (strcmp(coll_type, "alltoall") == 0 && strcmp(group_type, "tp") == 0){ + auto data = ata_ratio_data; + if (tp_size <= gpus_per_server){ + return getValue(data_size, 1, data); + } else { + int _temp_nnode = tp_size / gpus_per_server; + return getValue(data_size, _temp_nnode, data); + } + } + else if(strcmp(group_type, "dp") == 0 || strcmp(group_type, "dp_ep") == 0){ + return 1; + }else{ + return 1; + } +} \ No newline at end of file diff --git a/astra-sim-alibabacloud/astra-sim/system/calbusbw.h b/astra-sim-alibabacloud/astra-sim/system/calbusbw.h new file mode 100644 index 00000000..a6698227 --- /dev/null +++ b/astra-sim-alibabacloud/astra-sim/system/calbusbw.h @@ -0,0 +1,50 @@ +#ifndef CALBUSBW_H +#define CALBUSBW_H +#include "astra-sim/system/AstraParamParse.hh" +#define SM80_NVLINK_BW 20.0 +#define SM90_NVLINK_BW 20.6 +#define H100_NVLINKS 18 +#define H800_NVLINKS 8 +#define A100_NVLINKS 12 +#define A800_NVLINKS 8 + +#define CX6_BW 23.5 // 25 +#define CX7_BW 48.5 // 50 +#define BF3_BW 48.5 // 50 + +#define H100_NVLS_BW 475.0 +#define H800_NVLS_BW 215.0 + +#define H800_PCIE_BW 51.2 // 64*0.8 +#define H100_PCIE_BW 51.2 // 64*0.8 +#define A100_PCIE_BW 25.6 // 32*0.8 +#define A800_PCIE_BW 25.6 // 32*0.8 +#define NIC_RATIO_PATH "astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv" +#define NVLINK_RATIO_PATH "astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv" +#define ATA_RATIO_PATH "astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv" +typedef struct { + GPUType node_type; + int node_count; + char* nic_type; + char* coll_type; + int cross_nic; + char* nccl_algo; + int gpus_pernode; + float nics_pernode; + float bw_per_nic; + float bw_intra; + int group_split_mask; + float real_nics_pernode; + bool is_nvlink; +} CalculationParameters; + +typedef struct { + float busbw; + int is_nvlink; +} BusBwResult; + +BusBwResult cal_busbw(GPUType node_type, float bw_intra, float bw_per_nic, float nics_pernode, int node_count, char* coll_type, int gpus_pernode, char* nic_type); +float cal_ratio(std::vector> nic_ratio_data,std::vector> nvlink_ratio_data,std::vector> ata_ratio_data,uint64_t data_size,int nranks,int tp_size,uint32_t gpus_per_server,char* group_type,char* coll_type,bool is_nvlink); +std::vector> readCSV(const std::string &filePath); +float getValue(double datasize, int _temp_nnode, const std::vector>& data); +#endif // CALBUSBW_H \ No newline at end of file diff --git a/astra-sim-alibabacloud/astra-sim/workload/Layer.cc b/astra-sim-alibabacloud/astra-sim/workload/Layer.cc old mode 100755 new mode 100644 index d4dd1c7e..ce2363de --- a/astra-sim-alibabacloud/astra-sim/workload/Layer.cc +++ b/astra-sim-alibabacloud/astra-sim/workload/Layer.cc @@ -8,7 +8,9 @@ LICENSE file in the root directory of this source tree. #include "astra-sim/system/IntData.hh" #include "astra-sim/system/MockNcclLog.h" #include "astra-sim/system/AstraParamParse.hh" - +// #ifdef ANALYTI +#include "astra-sim/system/calbusbw.h" +// #endif #ifdef NS3_MPI #include "ns3/mpi-interface.h" @@ -499,7 +501,7 @@ LayerData Layer::report( data = "total exposed comm," + to_string(total_exposed) + ",total comp," + to_string(total_compute) + ",total time," + to_string(total_time); EndToEnd->write_line(data); - Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp) * 1e9) / FREQ ); + Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp_overlap_ratio) * 1e9) / FREQ ); Expose_PP_time *= (1-param->net_work_param.pp_overlap_ratio) ; //pp bubble time pre_bubble_time *= static_cast(PP_size - 1) / (GA * vpp); @@ -729,7 +731,7 @@ LayerData Layer::report( total_exposed = (((double)Sys::boostedTick()) / FREQ ) - total_compute; } //pp commtime - Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp) * 1e9) / FREQ ); + Tick Expose_PP_time = (2 * vpp * GA * (pp_commsize * GBps / (param->net_work_param.pp_overlap_ratio) * 1e9) / FREQ ); Expose_PP_time *= (1-param->net_work_param.pp_overlap_ratio) ; //pp bubble time pre_bubble_time *= static_cast(PP_size - 1) / (GA * vpp); @@ -852,6 +854,67 @@ static std::pair binarySearch(const std::vector& arr, long targe return std::make_pair(leftIndex, rightIndex); } +char* comtype_to_coll(ComType comtype) { + switch (comtype) { + case ComType::None: + return "none"; + case ComType::Reduce_Scatter: + return "reducescatter"; + case ComType::All_Gather: + return "allgather"; + case ComType::All_Reduce: + return "allreduce"; + case ComType::All_to_All: + return "alltoall"; + case ComType::All_Reduce_All_to_All: + return "all_reduce_all_to_all"; + case ComType::All_Reduce_NVLS: + return "all_reduce_nvls"; + default: + return "unknown"; + } +} +float Layer::cal_ratio( + uint64_t data_size, + int nranks, + int tp_size, + uint32_t gpus_per_server, + MockNccl::GroupType group_type, + char* coll_type, + bool is_nvlink){ + UserParam* param = UserParam::getInstance(); + auto nic_ratio_data = generator->nic_ratio_data; + auto nvlink_ratio_data = generator->nvlink_ratio_data; + auto ata_ratio_data = generator->ata_ratio_data; + if ((strcmp(coll_type, "allgather") == 0 || strcmp(coll_type, "reducescatter") == 0 ) && group_type == MockNccl::GroupType::TP){ + auto data = is_nvlink ? nvlink_ratio_data : nic_ratio_data; + int _temp_nnode = (tp_size < gpus_per_server) ? 1 : tp_size / gpus_per_server ; + return getValue(data_size, _temp_nnode, data); + } else if (strcmp(coll_type, "alltoall") == 0 && group_type == MockNccl::GroupType::EP){ + auto data = ata_ratio_data; + if(tp_size * nranks <= gpus_per_server){ + return getValue(data_size, 1, data); + }else if(tp_size >= gpus_per_server){ //multi + return getValue(data_size, 9, data); + } else { + int _temp_nnode = (tp_size * nranks) / gpus_per_server; + return getValue(data_size, _temp_nnode, data); + } + } else if (strcmp(coll_type, "alltoall") == 0 && group_type == MockNccl::GroupType::TP){ + auto data = ata_ratio_data; + if (tp_size <= gpus_per_server){ + return getValue(data_size, 1, data); + } else { + int _temp_nnode = tp_size / gpus_per_server; + return getValue(data_size, _temp_nnode, data); + } + } + else if(group_type == MockNccl::GroupType::DP || group_type == MockNccl::GroupType::DP_EP){ + return 1; + }else{ + return 1; + } +} Tick Layer::compute_time( ComType comtype, int tp_size, @@ -866,102 +929,86 @@ Tick Layer::compute_time( return 0; } - bool DP_comm_inside = false; - bool TP_comm_inside = false; - bool EP_comm_inside = false; + int n_ranks; int nnics; uint32_t gpus_per_server = param->net_work_param.gpus_per_server; GPUType gpu_type = param->net_work_param.gpu_type; - float tp_ar = param->net_work_param.tp_ar; - float tp_ag = param->net_work_param.tp_ag; - float tp_ata = param->net_work_param.tp_ata; - float ep_ata = param->net_work_param.ep_ata; - float dp_ag = param->net_work_param.dp_ag; - float ep_ag = param->net_work_param.ep_ag; - float dp_ar = param->net_work_param.dp_ar; - float ep_ar = param->net_work_param.ep_ar; - if (group_type == MockNccl::GroupType::TP || group_type == MockNccl::GroupType::EP) { - n_ranks = tp_size; - if (n_ranks <= gpus_per_server) - TP_comm_inside = true; - } else if ( - group_type == MockNccl::GroupType::DP || - group_type == MockNccl::GroupType::EP || group_type == MockNccl::GroupType::DP_EP) { - n_ranks = nranks; - nnics = gpus_per_server / tp_size; - if (all_gpus == gpus_per_server && tp_size <= gpus_per_server) - DP_comm_inside = true; - - } - if (TP_comm_inside || DP_comm_inside) { - if (comtype == ComType::All_Reduce) { + float nvlink_bw = param->net_work_param.nvlink_bw; + float bw_per_nic = param->net_work_param.bw_per_nic; + uint32_t nics_per_server = param->net_work_param.nics_per_server; + char* nic_type = param->net_work_param.nic_type; + char* coll_type = comtype_to_coll(comtype); + float bw_ratio = 1.0; + BusBwResult result; - comp_time = data_size * GBps / tp_ar * 1e9 * 2 * //tp2 ep8 164.8 tp16 218 - (nranks - 1) / (nranks / 1.0); + if (1 < data_size && data_size < 1048576){ + if(nranks == 2) comp_time = 10000; + if(nranks == 4) comp_time = 12000; + if(nranks == 8) comp_time = 15000; + if(nranks == 16) comp_time = 66000; + if(nranks == 32) comp_time = 135000; + if(nranks == 64) comp_time = 200000; + if(nranks == 128) comp_time = 320000; + return comp_time; + } + if (group_type == MockNccl::GroupType::TP ){ + //TP_comm_inside + if(tp_size <= gpus_per_server){ + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,nics_per_server,1,coll_type,tp_size,nic_type); + }else{ + int _node_count = tp_size / gpus_per_server; + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,nics_per_server,_node_count,coll_type,gpus_per_server,nic_type); } - else if (group_type == MockNccl::GroupType::TP && ( - comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter )) { - comp_time = data_size * GBps / tp_ag * 1e9 * - (nranks - 1) / (nranks / 1.0); - } else if (group_type == MockNccl::GroupType::TP && ( - comtype == ComType::All_to_All)) { - comp_time = data_size * GBps / tp_ata * 1e9 * - (nranks - 1) / (nranks / 1.0); - }else if (group_type == MockNccl::GroupType::EP && - comtype == ComType::All_to_All) { + }else if (group_type == MockNccl::GroupType::EP && nranks > 1) + { + if(tp_size * nranks <= gpus_per_server){ + uint32_t _temp_gpus_per_server = gpus_per_server / tp_size; + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,nics_per_server,1,coll_type,_temp_gpus_per_server,nic_type); - comp_time = data_size * GBps / ep_ata * 1e9 * - (nranks - 1) / (nranks / 1.0); - - }else { - comp_time = 0; - } - } else if (!TP_comm_inside && group_type == MockNccl::GroupType::TP) { - if (comtype == ComType::All_Reduce) { - comp_time = data_size * GBps / - tp_ar * 1e9 * 2 * - (nranks - 1) / (nranks / 1.0); - } else if ( - comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter) { - comp_time = data_size * GBps / - tp_ag * 1e9 * - (nranks - 1) / (nranks / 1.0); - } else if ( - comtype == ComType::All_to_All) { - comp_time = data_size * GBps / - tp_ata * 1e9 * - (nranks - 1) / (nranks / 1.0); - } else { - comp_time = 0; - } - } else if ( - !DP_comm_inside && - (group_type == MockNccl::GroupType::DP)) { - if (comtype == ComType::All_Reduce) { - comp_time = data_size * GBps / dp_ar * 1e9 * - 2 * (nranks - 1) / (nranks / 1.0); - } else if ( - comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter || comtype == ComType::All_to_All) { - comp_time = data_size * GBps / dp_ag * 1e9 * //tp2 ep8 48.5 - (nranks - 1) / (nranks / 1.0); - } else { - comp_time = 0; + }else{ + int _node_count = (tp_size * nranks) / gpus_per_server; + uint32_t _temp_gpus_per_server = (gpus_per_server / tp_size > 1) ? (gpus_per_server / tp_size) : 1; + float _temp_nics_per_server = (tp_size > gpus_per_server) ? (nics_per_server / gpus_per_server) : (nics_per_server / tp_size); + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,_node_count,coll_type,_temp_gpus_per_server,nic_type); + } + }else if(group_type == MockNccl::GroupType::DP && nranks > 1){ + if(tp_size <= gpus_per_server){ + uint32_t _temp_gpus_per_server = gpus_per_server / tp_size; + float _temp_nics_per_server = nics_per_server / tp_size; + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,_temp_gpus_per_server,nic_type); + }else{ + float _temp_nics_per_server = nics_per_server / gpus_per_server; + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,1,nic_type); } - }else if ( - !DP_comm_inside && - ( group_type == MockNccl::GroupType::DP_EP)) { - if (comtype == ComType::All_Reduce) { - comp_time = data_size * GBps / ep_ar* 1e9 * - 2 * (nranks - 1) / (nranks / 1.0); - } else if ( - comtype == ComType::All_Gather || comtype == ComType::Reduce_Scatter || comtype == ComType::All_to_All) { - comp_time = data_size * GBps / ep_ag * 1e9 * //tp2 ep8 48.5 - (nranks - 1) / (nranks / 1.0); - } else { - comp_time = 0; + }else if(group_type == MockNccl::GroupType::DP_EP && nranks > 1){ + if(tp_size * ep_size <= gpus_per_server){ + float _temp_nics_per_server = nics_per_server / (tp_size * ep_size); + uint32_t _temp_gpus_per_server = gpus_per_server / (tp_size * ep_size); + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,_temp_gpus_per_server,nic_type); + + }else{ + float _temp_nics_per_server = nics_per_server / gpus_per_server; + result = cal_busbw(gpu_type,nvlink_bw,bw_per_nic,_temp_nics_per_server,nranks,coll_type,1,nic_type); } + }else{ + + comp_time = 0; + return comp_time; } + + bw_ratio = cal_ratio(data_size,nranks,tp_size,gpus_per_server,group_type,coll_type,result.is_nvlink); + cout<<"Communication Type: "<& involved_dimensions); + float cal_ratio(uint64_t data_size,int nranks,int tp_size,uint32_t gpus_per_server,MockNccl::GroupType group_type,char* coll_type,bool is_nvlink); std::pair compute_busbw(ComType comtype, int nranks,uint64_t data_size,Tick total_comm); Tick compute_time(ComType comtype, int tp_size,int nranks , uint64_t data_size, MockNccl::GroupType group_type, int all_gpus,int ep_size); }; diff --git a/astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv b/astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv new file mode 100644 index 00000000..2866883a --- /dev/null +++ b/astra-sim-alibabacloud/inputs/ratio/ata_ratio.csv @@ -0,0 +1,12 @@ +Size,1 Node(NVLinkx),2Node-16GPU(NIC ),4Node-32GPU(NIC),8Node-64GPU(NIC),16Node-128GPU(NIC),32Node-256GPU(NIC),64Node-512GPU(NIC),128Node-1024GPU(NIC),Multi 16,Multi 32,Muiti 64,Multi (NIC) +16777216,0.44,0.52,0.67,0.64,0.62,,,,0.86,0.86,0.77, +33554432,0.51,0.63,0.68,0.67,0.66,,,,0.92,0.9,0.8, +67108864,0.6,0.72,0.83,0.82,0.66,,,,0.94,0.91,0.83, +134217728,0.65,0.76,0.9,0.85,0.72,,,,0.94,0.92,0.87, +268435456,0.7,0.79,0.92,0.87,0.75,,,,0.94,0.93,0.91,0.9 +536870912,0.71,0.8,0.94,0.89,0.81,,,,0.95,0.94,0.93,0.95 +1073741824,0.74,0.81,0.94,0.92,0.85,,,,0.96,0.95,0.94,0.95 +2147483648,0.76,0.81,0.95,0.93,0.88,,,,0.96,0.96,0.95,0.95 +4294967296,0.77,0.8,0.95,0.94,0.89,,,,0.96,0.96,0.95,0.95 +8589934592,0.78,0.8,0.95,0.94,0.9,,,,0.96,0.96,0.96,0.95 +17179869184,0.78,0.8,0.95,0.94,0.9,,,,0.96,0.96,0.96,0.95 \ No newline at end of file diff --git a/astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv b/astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv new file mode 100644 index 00000000..14951e6e --- /dev/null +++ b/astra-sim-alibabacloud/inputs/ratio/nic_ratio.csv @@ -0,0 +1,12 @@ +Size,1 Node(NVLinkx),2Node-16GPU(NIC ),4Node-32GPU(NIC),8Node-64GPU(NIC),16Node-128GPU(NIC),32Node-256GPU(NIC),64Node-512GPU(NIC),128Node-1024GPU(NIC),Multi 16,Multi 32,Muiti 64,Multi (NIC) +16777216,0.45,0.32,0.24,0.13,0.05,0.044,0.021,0.0087,0.86,0.86,0.77, +33554432,0.55,0.47,0.38,0.24,0.11,0.052,0.042,0.021225,0.92,0.9,0.8, +67108864,0.65,0.5,0.53,0.39,0.21,0.106,0.051,0.04405,0.94,0.91,0.83, +134217728,0.69,0.67,0.67,0.54,0.38,0.205,0.107,0.0533,0.94,0.92,0.87, +268435456,0.73,0.85,0.77,0.69,0.57,0.387,0.209,0.1027,0.94,0.93,0.91,0.9 +536870912,0.76,0.89,0.85,0.78,0.73,0.562,0.384,0.1902,0.95,0.94,0.93,0.95 +1073741824,0.77,0.92,0.93,0.9,0.76,0.74,0.563,0.314575,0.96,0.95,0.94,0.95 +2147483648,0.78,0.94,0.94,0.94,0.92,0.758,0.745,0.553525,0.96,0.96,0.95,0.95 +4294967296,0.79,0.95,0.95,0.95,0.95,0.923,0.755,0.740175,0.96,0.96,0.95,0.95 +8589934592,0.8,0.95,0.96,0.96,0.96,0.953,0.914,0.7648,0.96,0.96,0.96,0.95 +17179869184,0.81,0.96,0.96,0.96,0.96,0.957,0.958,0.919175,0.96,0.96,0.96,0.95 \ No newline at end of file diff --git a/astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv b/astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv new file mode 100644 index 00000000..e4a8eaaf --- /dev/null +++ b/astra-sim-alibabacloud/inputs/ratio/nvlink_ratio.csv @@ -0,0 +1,12 @@ +Size,1 Node(NVLinkx),2Node-16GPU(NIC ),4Node-32GPU(NIC),8Node-64GPU(NIC),16Node-128GPU(NIC),32Node-256GPU(NIC),64Node-512GPU(NIC),128Node-1024GPU(NIC),Multi 16,Multi 32,Muiti 64,Multi (NIC) +16777216,0.45,0.55,0.34,0.16,0.08,,,,0.86,0.86,0.77, +33554432,0.55,0.68,0.6,0.34,0.16,,,,0.92,0.9,0.8, +67108864,0.65,0.74,0.71,0.62,0.33,,,,0.94,0.91,0.83, +134217728,0.69,0.8,0.74,0.7,0.65,,,,0.94,0.92,0.87, +268435456,0.73,0.84,0.79,0.78,0.72,,,,0.94,0.93,0.91,0.9 +536870912,0.76,0.86,0.86,0.82,0.75,,,,0.95,0.94,0.93,0.95 +1073741824,0.77,0.87,0.87,0.86,0.83,,,,0.96,0.95,0.94,0.95 +2147483648,0.78,0.87,0.87,0.87,0.86,,,,0.96,0.96,0.95,0.95 +4294967296,0.79,0.88,0.88,0.88,0.87,,,,0.96,0.96,0.95,0.95 +8589934592,0.8,0.88,0.88,0.88,0.88,,,,0.96,0.96,0.96,0.95 +17179869184,0.81,0.88,0.88,0.88,0.88,,,,0.96,0.96,0.96,0.95 \ No newline at end of file