diff --git a/.gitignore b/.gitignore index ef99bb26..a847bc2e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,6 @@ *.htm *.html *.dll -*.txt *.patch *.orig *.out diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d62fac82..03d47db4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,6 +17,49 @@ build_linux:gcc5: - g++ --version - make -j +build_linux:gcc10: + image: "ubuntu:groovy" + stage: build + before_script: + - apt-get update -qq && apt-get install -qq -y make g++ + script: + - g++ --version + - make -j + +build_linux:clang_scan: + image: "ubuntu:groovy" + stage: build + before_script: + - apt-get update -qq && apt-get install -qq -y make clang clang-tools perl g++ + script: + - scan-build --status-bugs make -j + +build_windows: + stage: build + before_script: + - 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"' + script: + - 'msbuild pcm-all.sln /p:Configuration=Release;Platform=x64 /t:Clean,Build /m' + tags: + - windows + +cppcheck: + image: "ubuntu:groovy" + stage: build + before_script: + - apt-get update -qq && apt-get install -qq -y cppcheck + script: + - sh cppcheck.sh . 28 + +build_linux:gcc9: + image: "ubuntu:focal" + stage: build + before_script: + - apt-get update -qq && apt-get install -qq -y make g++ + script: + - g++ --version + - make -j + build_linux:gcc7: image: "ubuntu:bionic" stage: build diff --git a/ENVVAR_README.md b/ENVVAR_README.md index 9abc2f01..a6153c18 100644 --- a/ENVVAR_README.md +++ b/ENVVAR_README.md @@ -3,3 +3,5 @@ `PCM_USE_UNCORE_PERF=1` : use Linux perf events API to program *uncore* PMUs (default is *not* to use it) `PCM_NO_RDT=1` : don't use RDT metrics for a better interoperation with pqos utility (https://github.com/intel/intel-cmt-cat) + +`PCM_USE_RESCTRL=1` : use Linux resctrl driver for RDT metrics diff --git a/Makefile b/Makefile index 7acebfb9..3f450954 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,8 @@ ifeq ($(UNAME), Linux) EXE += daemon-binaries endif -CXXFLAGS += -Wall -g -O3 -Wno-unknown-pragmas -std=c++11 -fPIC +CFLAGS += -Wall -g -O3 -Wno-unknown-pragmas -fPIC +CXXFLAGS += $(CFLAGS) -std=c++11 # uncomment if your Linux kernel supports access to /dev/mem from user space # CXXFLAGS += -DPCM_USE_PCI_MM_LINUX @@ -50,7 +51,7 @@ CXX=c++ LIB= -lpthread -lc++ endif -COMMON_OBJS = msr.o cpucounters.o pci.o mmio.o client_bw.o utils.o topology.o dashboard.o debug.o threadpool.o +COMMON_OBJS = msr.o cpucounters.o pci.o mmio.o client_bw.o utils.o topology.o dashboard.o debug.o threadpool.o resctrl.o EXE_OBJS = $(EXE:.x=.o) OBJS = $(COMMON_OBJS) $(EXE_OBJS) @@ -83,10 +84,10 @@ libpcm.so: $(COMMON_OBJS) pcm-core.o $(CXX) $(LDFLAGS) $(CXXFLAGS) -DPCM_SILENT -shared $^ $(LIB) -o $@ c_example.x: c_example.c libpcm.so - $(CC) -DPCM_DYNAMIC_LIB $< -ldl -Wl,-rpath,$(shell pwd) -o $@ + $(CC) $(CFLAGS) -DPCM_DYNAMIC_LIB $< -ldl -Wl,-rpath,$(shell pwd) -o $@ c_example_shlib.x: c_example.c libpcm.so - $(CC) $< -L./ -Wl,-rpath,$(shell pwd) -lpcm -o $@ + $(CC) $(CFLAGS) $< -L./ -Wl,-rpath,$(shell pwd) -lpcm -o $@ %.o: %.cpp $(CXX) $(CXXFLAGS) -c $*.cpp -o $*.o @@ -106,14 +107,11 @@ c_example_shlib.x: c_example.c libpcm.so @rm -f $*.d.tmp memoptest.x: memoptest.cpp - g++ -Wall -g -O0 -std=c++11 memoptest.cpp -o memoptest.x + $(CXX) -Wall -g -O0 -std=c++11 memoptest.cpp -o memoptest.x dashboardtest.x: dashboardtest.cpp $(COMMON_OBJS) $(CXX) -o $@ $^ $(LIB) -nice: - uncrustify --replace -c ~/uncrustify.cfg *.cpp *.h WinMSRDriver/Win7/*.h WinMSRDriver/Win7/*.c WinMSRDriver/WinXP/*.h WinMSRDriver/WinXP/*.c PCM_Win/*.h PCM_Win/*.cpp - prefix=/usr ifneq ($(DESTDIR),) diff --git a/c_example.c b/c_example.c index cae942e0..f3dad6e4 100644 --- a/c_example.c +++ b/c_example.c @@ -1,16 +1,19 @@ #include #include #include +#include int pcm_getcpu() { - int id = -1; - asm volatile ( - "rdtscp\n\t" - "mov %%ecx, %0\n\t": - "=r" (id) :: "%rax", "%rcx", "%rdx"); - // processor ID is in ECX: https://www.felixcloutier.com/x86/rdtscp - return id; + int id = -1; + asm volatile ( + "rdtscp\n\t" + "mov %%ecx, %0\n\t": + "=r" (id) :: "%rax", "%rcx", "%rdx"); + // processor ID is in ECX: https://www.felixcloutier.com/x86/rdtscp + // Linux encodes the NUMA node starting at bit 12, so remove the NUMA + // bits when returning the CPU integer by masking with 0xFFF. + return id & 0xFFF; } struct { @@ -38,8 +41,17 @@ uint64_t pcm_c_get_core_event(uint32_t, uint32_t); int main(int argc, const char *argv[]) { int i,a[100],b[100],c[100]; + uint32_t total = 0; int lcore_id; + /* Seed for predictable rand() results */ + srand(0); + for (i=0; i < 100; ++i) { + a[i] = rand(); + b[i] = rand(); + c[i] = rand(); + } + #ifdef PCM_DYNAMIC_LIB void * handle = dlopen("libpcm.so", RTLD_LAZY); if(!handle) { @@ -85,12 +97,19 @@ int main(int argc, const char *argv[]) return -2; } + printf("[c_example] Initializing PCM measurements:\n"); PCM.pcm_c_init(); + + printf("[c_example] Calling PCM start()\n"); PCM.pcm_c_start(); for(i=0;i<10000;i++) - c[i%100] = 4 * a[i%100] + b[i%100]; + c[i%100] = 4 * a[i%100] + b[i%100]; + for(i=0;i<100;i++) + total += c[i]; PCM.pcm_c_stop(); + printf("[c_example] PCM measurment stopped, compute result %u\n", total); + lcore_id = pcm_getcpu(); printf("C:%lu I:%lu, IPC:%3.2f\n", PCM.pcm_c_get_cycles(lcore_id), diff --git a/cppcheck.sh b/cppcheck.sh new file mode 100644 index 00000000..263f41de --- /dev/null +++ b/cppcheck.sh @@ -0,0 +1,11 @@ + +cppcheck $1 --force --enable=warning --inline-suppr -iPCM-Service_Win -j $2 2> cppcheck.out + +if [ -s cppcheck.out ] +then + cat cppcheck.out + exit 1 +fi + +echo No issues found + diff --git a/cpuasynchcounter.h b/cpuasynchcounter.h index ca44ddca..c294330c 100644 --- a/cpuasynchcounter.h +++ b/cpuasynchcounter.h @@ -45,8 +45,8 @@ class AsynchronCounterState { friend void * UpdateCounters(void *); -// AsynchronCounterState(const& AsynchronCounterState); //unimplemeted -// const& AsynchronCounterState operator=(const& AsynchronCounterState); //unimplemented + AsynchronCounterState(const AsynchronCounterState &) = delete; + const AsynchronCounterState & operator = (const AsynchronCounterState &) = delete; public: AsynchronCounterState() diff --git a/cpucounters.cpp b/cpucounters.cpp index 461bf285..e71f95a9 100644 --- a/cpucounters.cpp +++ b/cpucounters.cpp @@ -281,6 +281,7 @@ PCM * PCM::getInstance() uint32 build_bit_ui(uint32 beg, uint32 end) { + assert(end <= 31); uint32 myll = 0; if (end == 31) { @@ -353,6 +354,8 @@ uint64 extract_bits(uint64 myin, uint32 beg, uint32 end) uint64 PCM::extractCoreGenCounterValue(uint64 val) { + if (canUsePerf) return val; + if(core_gen_counter_width) return extract_bits(val, 0, core_gen_counter_width-1); @@ -361,6 +364,8 @@ uint64 PCM::extractCoreGenCounterValue(uint64 val) uint64 PCM::extractCoreFixedCounterValue(uint64 val) { + if (canUsePerf) return val; + if(core_fixed_counter_width) return extract_bits(val, 0, core_fixed_counter_width-1); @@ -646,7 +651,9 @@ bool PCM::isRDTDisabled() const bool PCM::QOSMetricAvailable() const { if (isRDTDisabled()) return false; - if (isSecureBoot()) return false; // TODO: use perf rdt driver +#ifndef __linux__ + if (isSecureBoot()) return false; +#endif PCM_CPUID_INFO cpuinfo; pcm_cpuid(0x7,0,cpuinfo); return (cpuinfo.reg.ebx & (1<<12))?true:false; @@ -655,7 +662,9 @@ bool PCM::QOSMetricAvailable() const bool PCM::L3QOSMetricAvailable() const { if (isRDTDisabled()) return false; - if (isSecureBoot()) return false; // TODO:: use perf rdt driver +#ifndef __linux__ + if (isSecureBoot()) return false; +#endif PCM_CPUID_INFO cpuinfo; pcm_cpuid(0xf,0,cpuinfo); return (cpuinfo.reg.edx & (1<<1))?true:false; @@ -672,7 +681,7 @@ bool PCM::L3CacheOccupancyMetricAvailable() const bool PCM::CoreLocalMemoryBWMetricAvailable() const { - if (cpu_model == SKX) return false; // SKZ4 errata + if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -682,7 +691,7 @@ bool PCM::CoreLocalMemoryBWMetricAvailable() const bool PCM::CoreRemoteMemoryBWMetricAvailable() const { - if (cpu_model == SKX) return false; // SKZ4 errata + if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -699,10 +708,34 @@ unsigned PCM::getMaxRMID() const return maxRMID; } -void PCM::initRMID() +void PCM::initRDT() { if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return; +#ifdef __linux__ + auto env = std::getenv("PCM_USE_RESCTRL"); + if (env != nullptr && std::string(env) == std::string("1")) + { + std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because environment variable PCM_USE_RESCTRL=1\n"; + resctrl.init(); + useResctrl = true; + return; + } + if (resctrl.isMounted()) + { + std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because resctrl driver is mounted.\n"; + resctrl.init(); + useResctrl = true; + return; + } + if (isSecureBoot()) + { + std::cout << "INFO: using Linux resctrl driver for RDT metrics (L3OCC, LMB, RMB) because Secure Boot mode is enabled.\n"; + resctrl.init(); + useResctrl = true; + return; + } +#endif unsigned maxRMID; /* Calculate maximum number of RMID supported by socket */ maxRMID = getMaxRMID(); @@ -814,11 +847,7 @@ void PCM::initCStateSupportTables() PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: case BROADWELL: - case SKL: - case SKL_UY: - case KBL: - case KBL_1: - case ICL: + PCM_SKL_PATH_CASES case BROADWELL_XEON_E3: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0x3F8, 0, 0, 0x3F9, 0x3FA, 0x630, 0x631, 0x632}) ); @@ -858,11 +887,7 @@ void PCM::initCStateSupportTables() case CHERRYTRAIL: case APOLLO_LAKE: case DENVERTON: - case SKL_UY: - case SKL: - case KBL: - case KBL_1: - case ICL: + PCM_SKL_PATH_CASES PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FF, 0, 0, 0, 0}) ); @@ -1404,6 +1429,7 @@ void PCM::printSystemTopology() const { std::cerr << "Physical cores per socket: " << num_phys_cores_per_socket << "\n"; } + std::cerr << "Last level cache slices per socket: " << getMaxNumOfCBoxes() << "\n"; std::cerr << "Core PMU (perfmon) version: " << perfmon_version << "\n"; std::cerr << "Number of core PMU generic (programmable) counters: " << core_gen_counter_num_max << "\n"; std::cerr << "Width of generic (programmable) counters: " << core_gen_counter_width << " bits\n"; @@ -1470,9 +1496,7 @@ bool PCM::detectNominalFrequency() || cpu_model == AVOTON || cpu_model == APOLLO_LAKE || cpu_model == DENVERTON - || cpu_model == SKL - || cpu_model == KBL - || cpu_model == ICL + || useSKLPath() || cpu_model == KNL || cpu_model == SKX ) ? (100000000ULL) : (133333333ULL); @@ -1896,6 +1920,10 @@ PCM::PCM() : allow_multiple_instances(false), programmed_pmu(false), joulesPerEnergyUnit(0), +#ifdef __linux__ + resctrl(*this), +#endif + useResctrl(false), disable_JKT_workaround(false), blocked(false), coreCStateMsr(NULL), @@ -1953,8 +1981,7 @@ PCM::PCM() : initUncoreObjects(); - // Initialize RMID to the cores for QOS monitoring - initRMID(); + initRDT(); readCPUMicrocodeLevel(); @@ -2028,7 +2055,7 @@ bool PCM::isSocketOnline(int32 socket_id) const return socketRefCore[socket_id] != -1; } -bool PCM::isCPUModelSupported(int model_) +bool PCM::isCPUModelSupported(const int model_) { return ( model_ == NEHALEM_EP || model_ == NEHALEM_EX @@ -2047,8 +2074,12 @@ bool PCM::isCPUModelSupported(int model_) || model_ == BROADWELL || model_ == KNL || model_ == SKL + || model_ == SKL_UY || model_ == KBL + || model_ == KBL_1 + || model_ == CML || model_ == ICL + || model_ == TGL || model_ == SKX ); } @@ -2059,9 +2090,9 @@ bool PCM::checkModel() if (cpu_model == ATOM_2) cpu_model = ATOM; if (cpu_model == HASWELL_ULT || cpu_model == HASWELL_2) cpu_model = HASWELL; if (cpu_model == BROADWELL_XEON_E3) cpu_model = BROADWELL; - if (cpu_model == SKL_UY) cpu_model = SKL; - if (cpu_model == KBL_1) cpu_model = KBL; - if (cpu_model == CML) cpu_model = KBL; + if (cpu_model == CML_1) cpu_model = CML; + if (cpu_model == ICL_1) cpu_model = ICL; + if (cpu_model == TGL_1) cpu_model = TGL; if(!isCPUModelSupported((int)cpu_model)) { @@ -2159,11 +2190,13 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; std::cerr << "Usage of Linux perf events is disabled through PCM_NO_PERF environment variable. Using direct PMU programming...\n"; } +/* if(num_online_cores < num_cores) { canUsePerf = false; std::cerr << "PCM does not support using Linux perf API on systems with offlined cores. Falling-back to direct PMU programming.\n"; } +*/ else if(PERF_COUNT_HW_MAX <= PCM_PERF_COUNT_HW_REF_CPU_CYCLES) { canUsePerf = false; @@ -2328,10 +2361,8 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter } else switch ( cpu_model ) { - case SKL: + PCM_SKL_PATH_CASES case SKX: - case KBL: - case ICL: assert(useSkylakeEvents()); coreEventDesc[0].event_number = SKL_MEM_LOAD_RETIRED_L3_MISS_EVTNR; coreEventDesc[0].umask_value = SKL_MEM_LOAD_RETIRED_L3_MISS_UMASK; @@ -2468,6 +2499,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter // Version for linux/windows/freebsd/dragonflybsd for (int i = 0; i < (int)num_cores; ++i) { + if (isCoreOnline(i) == false) continue; TemporalThreadAffinity tempThreadAffinity(i, false); // speedup trick for Linux const auto status = programCoreCounters(i, mode_, pExtDesc, lastProgrammedCustomCounters[i]); @@ -2592,6 +2624,9 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, ctrl_reg.fields.reserved1 = 0; } + MSR[i]->write(INST_RETIRED_ANY_ADDR, 0); + MSR[i]->write(CPU_CLK_UNHALTED_THREAD_ADDR, 0); + MSR[i]->write(CPU_CLK_UNHALTED_REF_ADDR, 0); MSR[i]->write(IA32_CR_FIXED_CTR_CTRL, ctrl_reg.value); } @@ -2669,6 +2704,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, value |= (1ULL << j); // enable all custom counters (if > 4) } + MSR[i]->write(IA32_PERF_GLOBAL_OVF_CTRL, value); MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, value); } return PCM::Success; @@ -3225,10 +3261,18 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Broadwell"; case SKL: return "Skylake"; + case SKL_UY: + return "Skylake U/Y"; case KBL: return "Kabylake"; + case KBL_1: + return "Kabylake/Whiskey Lake"; + case CML: + return "Comet Lake"; case ICL: return "Icelake"; + case TGL: + return "Tiger Lake"; case SKX: if (cpu_model_param >= 0) { @@ -3344,11 +3388,18 @@ void PCM::resetPMU() std::cerr << " Zeroed PMU registers\n"; #endif } -void PCM::freeRMID() +void PCM::cleanupRDT() { if(!(QOSMetricAvailable() && L3QOSMetricAvailable())) { return; } +#ifdef __linux__ + if (useResctrl) + { + resctrl.cleanup(); + return; + } +#endif for(int32 core = 0; core < num_cores; core ++ ) { @@ -3410,7 +3461,7 @@ void PCM::cleanup() disableForceRTMAbortMode(); cleanupUncorePMUs(); - freeRMID(); + cleanupRDT(); #ifdef __linux__ if (needToRestoreNMIWatchdog) { @@ -3652,6 +3703,7 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) PCM * m = PCM::getInstance(); const int32 core_gen_counter_num_max = m->getMaxCustomCoreEvents(); + uint64 overflows = 0; const auto corruptedCountersMask = m->checkCustomCoreProgramming(msr); // reading core PMU counters @@ -3671,13 +3723,27 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) else #endif { - msr->read(INST_RETIRED_ANY_ADDR, &cInstRetiredAny); - msr->read(CPU_CLK_UNHALTED_THREAD_ADDR, &cCpuClkUnhaltedThread); - msr->read(CPU_CLK_UNHALTED_REF_ADDR, &cCpuClkUnhaltedRef); - for (int i = 0; i < core_gen_counter_num_max; ++i) + uint64 overflows_after = 0; + + do { - msr->read(IA32_PMC0 + i, &cCustomEvents[i]); - } + msr->read(IA32_PERF_GLOBAL_STATUS, &overflows); // read overflows + // std::cerr << "Debug " << core_id << " IA32_PERF_GLOBAL_STATUS: " << overflows << std::endl; + + msr->read(INST_RETIRED_ANY_ADDR, &cInstRetiredAny); + msr->read(CPU_CLK_UNHALTED_THREAD_ADDR, &cCpuClkUnhaltedThread); + msr->read(CPU_CLK_UNHALTED_REF_ADDR, &cCpuClkUnhaltedRef); + for (int i = 0; i < core_gen_counter_num_max; ++i) + { + msr->read(IA32_PMC0 + i, &cCustomEvents[i]); + } + + msr->read(IA32_PERF_GLOBAL_STATUS, &overflows_after); // read overflows again + // std::cerr << "Debug " << core_id << " IA32_PERF_GLOBAL_STATUS: " << overflows << std::endl; + + } while (overflows != overflows_after); // repeat the reading if an overflow happened during the reading + + msr->write(IA32_PERF_GLOBAL_OVF_CTRL, overflows); // clear overflows } for (int i = 0; i < core_gen_counter_num_max; ++i) @@ -3686,12 +3752,12 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) } // std::cout << "DEBUG1: " << msr->getCoreId() << " " << cInstRetiredAny << " \n"; - if(m->L3CacheOccupancyMetricAvailable()) + if (m->L3CacheOccupancyMetricAvailable() && m->useResctrl == false) { msr->lock(); uint64 event = 1; m->initQOSevent(event, core_id); - msr->read(IA32_QM_CTR,&cL3Occupancy); + msr->read(IA32_QM_CTR, &cL3Occupancy); //std::cout << "readAndAggregate reading IA32_QM_CTR " << std::dec << cL3Occupancy << std::dec << "\n"; msr->unlock(); } @@ -3710,16 +3776,25 @@ void BasicCounterState::readAndAggregate(std::shared_ptr msr) msr->read(MSR_SMI_COUNT, &cSMICount); - InstRetiredAny += m->extractCoreFixedCounterValue(cInstRetiredAny); - CpuClkUnhaltedThread += m->extractCoreFixedCounterValue(cCpuClkUnhaltedThread); - CpuClkUnhaltedRef += m->extractCoreFixedCounterValue(cCpuClkUnhaltedRef); + InstRetiredAny += checked_uint64(m->extractCoreFixedCounterValue(cInstRetiredAny), extract_bits(overflows, 32, 32)); + CpuClkUnhaltedThread += checked_uint64(m->extractCoreFixedCounterValue(cCpuClkUnhaltedThread), extract_bits(overflows, 33, 33)); + CpuClkUnhaltedRef += checked_uint64(m->extractCoreFixedCounterValue(cCpuClkUnhaltedRef), extract_bits(overflows, 34, 34)); for (int i = 0; i < core_gen_counter_num_max; ++i) { - Event(i) += m->extractCoreGenCounterValue(cCustomEvents[i]); + Event[i] += checked_uint64(m->extractCoreGenCounterValue(cCustomEvents[i]), extract_bits(overflows, i, i)); + } +#ifdef __linux__ + if (m->useResctrl) + { + L3Occupancy = m->resctrl.getL3OCC(core_id) / 1024; + } + else +#endif + { + //std::cout << "Scaling Factor " << m->L3ScalingFactor; + cL3Occupancy = m->extractQOSMonitoring(cL3Occupancy); + L3Occupancy = (cL3Occupancy==PCM_INVALID_QOS_MONITORING_DATA)? PCM_INVALID_QOS_MONITORING_DATA : (uint64)((double)(cL3Occupancy * m->L3ScalingFactor) / 1024.0); } - //std::cout << "Scaling Factor " << m->L3ScalingFactor; - cL3Occupancy = m->extractQOSMonitoring(cL3Occupancy); - L3Occupancy = (cL3Occupancy==PCM_INVALID_QOS_MONITORING_DATA)? PCM_INVALID_QOS_MONITORING_DATA : (uint64)((double)(cL3Occupancy * m->L3ScalingFactor) / 1024.0); for(int i=0; i <= int(PCM::MAX_C_STATE);++i) CStateResidency[i] += cCStateResidency[i]; ThermalHeadroom = extractThermalHeadroom(thermStatus); @@ -3933,7 +4008,7 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& allPMUConfigs_) return PCM::UnknownError; } size_t c = 0; - for (; c < corePMUConfig.programmable.size() && c < (size_t)getMaxCustomCoreEvents() && c < PERF_MAX_COUNTERS; ++c) + for (; c < corePMUConfig.programmable.size() && c < (size_t)getMaxCustomCoreEvents() && c < PERF_MAX_CUSTOM_COUNTERS; ++c) { regs[c].value = corePMUConfig.programmable[c].first[0]; } @@ -4129,6 +4204,20 @@ SystemCounterState PCM::getSystemCounterState() template void PCM::readAndAggregateMemoryBWCounters(const uint32 core, CounterStateType & result) { +#ifdef __linux__ + if (useResctrl) + { + if (CoreLocalMemoryBWMetricAvailable()) + { + result.MemoryBWLocal += resctrl.getMBL(core) / (1024*1024); + } + if (CoreRemoteMemoryBWMetricAvailable()) + { + result.MemoryBWTotal += resctrl.getMBT(core) / (1024*1024); + } + return; + } +#endif uint64 cMemoryBWLocal = 0; uint64 cMemoryBWTotal = 0; @@ -4883,32 +4972,36 @@ bool PCM::isSecureBoot() const bool PCM::useLinuxPerfForUncore() const { - static bool printed = false; + static int use = -1; + if (use != -1) + { + return 1 == use; + } + use = 0; bool secureBoot = isSecureBoot(); #ifdef PCM_USE_PERF + const auto imcIDs = enumeratePerfPMUs("imc", 100); + std::cout << "INFO: Linux perf interface to program uncore PMUs is " << (imcIDs.empty()?"NOT ":"") << "present\n"; const char * perf_env = std::getenv("PCM_USE_UNCORE_PERF"); if (perf_env != NULL && std::string(perf_env) == std::string("1")) { - if (!printed) std::cout << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n"; - printed = true; - return true; + std::cout << "INFO: using Linux perf interface to program uncore PMUs because env variable PCM_USE_UNCORE_PERF=1\n"; + use = 1; } if (secureBoot) { - if (!printed) std::cout << "Secure Boot detected. Using Linux perf for uncore PMU programming.\n"; - printed = true; - return true; + std::cout << "INFO: Secure Boot detected. Using Linux perf for uncore PMU programming.\n"; + use = 1; } else #endif { if (secureBoot) { - if (!printed) std::cerr << "ERROR: Secure Boot detected. Recompile PCM with -DPCM_USE_PERF or disable Secure Boot.\n"; - printed = true; + std::cerr << "ERROR: Secure Boot detected. Recompile PCM with -DPCM_USE_PERF or disable Secure Boot.\n"; } } - return false; + return 1 == use; } ServerPCICFGUncore::ServerPCICFGUncore(uint32 socket_, const PCM * pcm) : @@ -6597,7 +6690,7 @@ void PCM::programIIOCounters(IIOPMUCNTCTLRegister rawEvents[4], int IIOStack) else IIO_units.push_back(IIOStack); - for (int32 i = 0; (i < num_sockets) && MSR.size(); ++i) + for (int32 i = 0; (i < num_sockets) && MSR.size() && iioPMUs.size(); ++i) { uint32 refCore = socketRefCore[i]; TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux @@ -6673,9 +6766,9 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc if((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) *cboPMUs[i][cbo].filter[0] = llc_lookup_tid_filter; - PCM::program(cboPMUs[i][cbo], events, events + 4, UNC_PMON_UNIT_CTL_FRZ_EN); + PCM::program(cboPMUs[i][cbo], events, events + ServerUncoreCounterState::maxCounters, UNC_PMON_UNIT_CTL_FRZ_EN); - for (int c = 0; c < 4; ++c) + for (int c = 0; c < ServerUncoreCounterState::maxCounters; ++c) { *cboPMUs[i][cbo].counterValue[c] = 0; } @@ -6788,7 +6881,8 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) void PCM::programCbo() { - uint64 events[4] = {0, 0, 0, 0}; + uint64 events[ServerUncoreCounterState::maxCounters]; + std::fill(events, events + ServerUncoreCounterState::maxCounters, 0); uint32 opCode = 0; initLLCReadMissLatencyEvents(events, opCode); diff --git a/cpucounters.h b/cpucounters.h index b2a6827e..32f4fadd 100644 --- a/cpucounters.h +++ b/cpucounters.h @@ -67,6 +67,8 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #endif #endif +#include "resctrl.h" + namespace pcm { #ifdef _MSC_VER @@ -530,6 +532,8 @@ class PCM_API PCM friend class Aggregator; friend class ServerPCICFGUncore; PCM(); // forbidden to call directly because it is a singleton + PCM(const PCM &) = delete; + PCM & operator = (const PCM &) = delete; int32 cpu_family; int32 cpu_model; @@ -580,6 +584,10 @@ class PCM_API PCM std::vector > memory_bw_local; std::vector > memory_bw_total; +#ifdef __linux__ + Resctrl resctrl; +#endif + bool useResctrl; std::shared_ptr clientBW; std::shared_ptr clientImcReads; @@ -810,7 +818,7 @@ class PCM_API PCM bool PMUinUse(); void cleanupPMU(); - void freeRMID(); + void cleanupRDT(); bool decrementInstanceSemaphore(); // returns true if it was the last instance #ifdef __APPLE__ @@ -824,7 +832,7 @@ class PCM_API PCM void computeQPISpeedBeckton(int core_nr); void destroyMSR(); void computeNominalFrequency(); - static bool isCPUModelSupported(int model_); + static bool isCPUModelSupported(const int model_); std::string getSupportedUarchCodenames() const; std::string getUnsupportedMessage() const; bool detectModel(); @@ -843,10 +851,12 @@ class PCM_API PCM * * \returns nothing */ - void initRMID(); + void initRDT(); /*! - * \brief initializes each core event MSR with an RMID for QOS event (L3 cache monitoring or memory bandwidth monitoring) + * \brief Initializes RDT * + * Initializes RDT infrastructure through resctrl Linux driver or direct MSR programming. + * For the latter: initializes each core event MSR with an RMID for QOS event (L3 cache monitoring or memory bandwidth monitoring) * \returns nothing */ void initQOSevent(const uint64 event, const int32 core); @@ -1244,7 +1254,11 @@ class PCM_API PCM KBL = 158, KBL_1 = 142, CML = 166, + CML_1 = 165, ICL = 126, + ICL_1 = 125, + TGL = 140, + TGL_1 = 141, BDX = 79, KNL = 87, SKL = 94, @@ -1252,6 +1266,27 @@ class PCM_API PCM END_OF_MODEL_LIST = 0x0ffff }; +#define PCM_SKL_PATH_CASES \ + case PCM::SKL_UY: \ + case PCM::KBL: \ + case PCM::KBL_1: \ + case PCM::CML: \ + case PCM::ICL: \ + case PCM::TGL: \ + case PCM::SKL: + +private: + bool useSKLPath() const + { + switch (cpu_model) + { + PCM_SKL_PATH_CASES + return true; + } + return false; + } +public: + //! \brief Reads CPU model id //! \return CPU model ID uint32 getCPUModel() const { return (uint32)cpu_model; } @@ -1398,6 +1433,7 @@ class PCM_API PCM //! \return max number of instructions per cycle uint32 getMaxIPC() const { + if (ICL == cpu_model || TGL == cpu_model) return 5; switch (cpu_model) { case DENVERTON: @@ -1416,9 +1452,7 @@ class PCM_API PCM case BROADWELL: case BDX_DE: case BDX: - case SKL: - case KBL: - case ICL: + PCM_SKL_PATH_CASES case SKX: return 4; case KNL: @@ -1670,9 +1704,7 @@ class PCM_API PCM || cpu_model == PCM::BDX_DE || cpu_model == PCM::BDX || cpu_model == PCM::KNL - || cpu_model == PCM::SKL - || cpu_model == PCM::KBL - || cpu_model == PCM::ICL + || useSKLPath() || cpu_model == PCM::SKX ); } @@ -1754,9 +1786,7 @@ class PCM_API PCM || cpu_model == PCM::IVY_BRIDGE || cpu_model == PCM::HASWELL || cpu_model == PCM::BROADWELL - || cpu_model == PCM::SKL - || cpu_model == PCM::KBL - || cpu_model == PCM::ICL + || useSKLPath() ); } @@ -1773,7 +1803,7 @@ class PCM_API PCM cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX || cpu_model == PCM::SKX - || cpu_model == PCM::SKL + || useSKLPath() ); } @@ -1862,9 +1892,7 @@ class PCM_API PCM bool useSkylakeEvents() const { - return PCM::SKL == cpu_model - || PCM::KBL == cpu_model - || PCM::ICL == cpu_model + return useSKLPath() || PCM::SKX == cpu_model ; } @@ -1875,9 +1903,7 @@ class PCM_API PCM || cpu_model == IVY_BRIDGE || cpu_model == HASWELL || cpu_model == BROADWELL - || cpu_model == SKL - || cpu_model == KBL - || cpu_model == ICL + || useSKLPath() ; } @@ -2031,35 +2057,21 @@ class BasicCounterState friend uint64 getSMICount(const CounterStateType & before, const CounterStateType & after); protected: - uint64 InstRetiredAny; - uint64 CpuClkUnhaltedThread; - uint64 CpuClkUnhaltedRef; - // dont put any additional fields between Event 0-Event 3 because getNumberOfCustomEvents assumes there are none - union { - uint64 L3Miss; - uint64 Event0; - uint64 ArchLLCMiss; - }; - union { - uint64 L3UnsharedHit; - uint64 Event1; - uint64 ArchLLCRef; - uint64 SKLL3Hit; - }; - union { - uint64 L2HitM; - uint64 Event2; - uint64 SKLL2Miss; - }; - union { - uint64 L2Hit; - uint64 Event3; + checked_uint64 InstRetiredAny; + checked_uint64 CpuClkUnhaltedThread; + checked_uint64 CpuClkUnhaltedRef; + checked_uint64 Event[PERF_MAX_CUSTOM_COUNTERS]; + enum + { + L3MissPos = 0, + ArchLLCMissPos = 0, + L3UnsharedHitPos = 1, + ArchLLCRefPos = 1, + SKLL3HitPos = 1, + L2HitMPos = 2, + SKLL2MissPos = 2, + L2HitPos = 3 }; - uint64 Event4, Event5, Event6, Event7; - uint64* getEventsPtr() { return &Event0; }; - const uint64* getEventsPtr() const { return &Event0; }; - uint64& Event(size_t i) { return getEventsPtr()[i]; }; - const uint64& Event(size_t i) const { return getEventsPtr()[i]; }; uint64 InvariantTSC; // invariant time stamp counter uint64 CStateResidency[PCM::MAX_C_STATE + 1]; int32 ThermalHeadroom; @@ -2070,9 +2082,6 @@ class BasicCounterState public: BasicCounterState() : - InstRetiredAny(0), - CpuClkUnhaltedThread(0), - CpuClkUnhaltedRef(0), InvariantTSC(0), ThermalHeadroom(PCM_INVALID_THERMAL_HEADROOM), L3Occupancy(0), @@ -2081,7 +2090,6 @@ class BasicCounterState SMICount(0) { memset(CStateResidency, 0, sizeof(CStateResidency)); - memset(getEventsPtr(), 0, sizeof(uint64) * PERF_MAX_CUSTOM_COUNTERS); } virtual ~BasicCounterState() { } @@ -2096,7 +2104,7 @@ class BasicCounterState CpuClkUnhaltedRef += o.CpuClkUnhaltedRef; for (int i = 0; i < PERF_MAX_CUSTOM_COUNTERS; ++i) { - Event(i) += o.Event(i); + Event[i] += o.Event[i]; } InvariantTSC += o.InvariantTSC; for (int i = 0; i <= (int)PCM::MAX_C_STATE; ++i) @@ -2810,7 +2818,7 @@ double getExecUsage(const CounterStateType & before, const CounterStateType & af template uint64 getInstructionsRetired(const CounterStateType & now) // instructions { - return now.InstRetiredAny; + return now.InstRetiredAny.getRawData_NoOverflowProtection(); } /*! \brief Computes the number core clock cycles when signal on a specific core is running (not halted) @@ -2862,7 +2870,7 @@ uint64 getRefCycles(const CounterStateType & before, const CounterStateType & af template uint64 getCycles(const CounterStateType & now) // clocks { - return now.CpuClkUnhaltedThread; + return now.CpuClkUnhaltedThread.getRawData_NoOverflowProtection(); } /*! \brief Computes average number of retired instructions per core cycle for the entire system combining instruction counts from logical cores to corresponding physical cores @@ -3010,7 +3018,7 @@ template uint64 getL3CacheMisses(const CounterStateType & before, const CounterStateType & after) { if (!PCM::getInstance()->isL3CacheMissesAvailable()) return 0; - return after.L3Miss - before.L3Miss; + return after.Event[BasicCounterState::L3MissPos] - before.Event[BasicCounterState::L3MissPos]; } /*! \brief Computes number of L2 cache misses @@ -3026,15 +3034,15 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType auto pcm = PCM::getInstance(); if (pcm->isL2CacheMissesAvailable() == false) return 0ULL; if (pcm->useSkylakeEvents()) { - return after.SKLL2Miss - before.SKLL2Miss; + return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL) { - return after.ArchLLCMiss - before.ArchLLCMiss; + return after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; } - uint64 L3Miss = after.L3Miss - before.L3Miss; - uint64 L3UnsharedHit = after.L3UnsharedHit - before.L3UnsharedHit; - uint64 L2HitM = after.L2HitM - before.L2HitM; + uint64 L3Miss = after.Event[BasicCounterState::L3MissPos] - before.Event[BasicCounterState::L3MissPos]; + uint64 L3UnsharedHit = after.Event[BasicCounterState::L3UnsharedHitPos] - before.Event[BasicCounterState::L3UnsharedHitPos]; + uint64 L2HitM = after.Event[BasicCounterState::L2HitMPos] - before.Event[BasicCounterState::L2HitMPos]; return L2HitM + L3UnsharedHit + L3Miss; } @@ -3052,11 +3060,11 @@ uint64 getL2CacheHits(const CounterStateType & before, const CounterStateType & if (pcm->isL2CacheHitsAvailable() == false) return 0ULL; if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL) { - uint64 L2Miss = after.ArchLLCMiss - before.ArchLLCMiss; - uint64 L2Ref = after.ArchLLCRef - before.ArchLLCRef; + uint64 L2Miss = after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; + uint64 L2Ref = after.Event[BasicCounterState::ArchLLCRefPos] - before.Event[BasicCounterState::ArchLLCRefPos]; return L2Ref - L2Miss; } - return after.L2Hit - before.L2Hit; + return after.Event[BasicCounterState::L2HitPos] - before.Event[BasicCounterState::L2HitPos]; } /*! \brief Computes L3 Cache Occupancy @@ -3104,7 +3112,7 @@ template uint64 getL3CacheHitsNoSnoop(const CounterStateType & before, const CounterStateType & after) { if (!PCM::getInstance()->isL3CacheHitsNoSnoopAvailable()) return 0; - return after.L3UnsharedHit - before.L3UnsharedHit; + return after.Event[BasicCounterState::L3UnsharedHitPos] - before.Event[BasicCounterState::L3UnsharedHitPos]; } /*! \brief Computes number of L3 cache hits where snooping in sibling L2 caches had to be done @@ -3119,9 +3127,9 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy { if (!PCM::getInstance()->isL3CacheHitsSnoopAvailable()) return 0; if (PCM::getInstance()->useSkylakeEvents()) { - return after.SKLL3Hit - before.SKLL3Hit; + return after.Event[BasicCounterState::SKLL3HitPos] - before.Event[BasicCounterState::SKLL3HitPos]; } - return after.L2HitM - before.L2HitM; + return after.Event[BasicCounterState::L2HitMPos] - before.Event[BasicCounterState::L2HitMPos]; } @@ -3192,7 +3200,7 @@ inline double getCoreCStateResidency(int state, const CounterStateType & before, template inline uint64 getCoreCStateResidency(int state, const CounterStateType& now) { - if (state == 0) return now.CpuClkUnhaltedRef; + if (state == 0) return now.CpuClkUnhaltedRef.getRawData_NoOverflowProtection(); return now.BasicCounterState::CStateResidency[state]; } @@ -3359,7 +3367,7 @@ uint64 getSMICount(const CounterStateType & before, const CounterStateType & aft template uint64 getNumberOfCustomEvents(int32 eventCounterNr, const CounterStateType & before, const CounterStateType & after) { - return after.Event(eventCounterNr) - before.Event(eventCounterNr); + return after.Event[eventCounterNr] - before.Event[eventCounterNr]; } /*! \brief Get estimation of QPI data traffic per incoming QPI link diff --git a/daemon/client/Debug/makefile b/daemon/client/Debug/makefile index c4537f5c..9d5a0f6f 100644 --- a/daemon/client/Debug/makefile +++ b/daemon/client/Debug/makefile @@ -42,8 +42,8 @@ all: client # Tool invocations client: $(OBJS) $(USER_OBJS) @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - g++ -o "client" $(OBJS) $(USER_OBJS) $(LIBS) + @echo 'Invoking: C++ Linker' + $(CXX) -o "client" $(OBJS) $(USER_OBJS) $(LIBS) @echo 'Finished building target: $@' @echo ' ' diff --git a/daemon/client/Debug/subdir.mk b/daemon/client/Debug/subdir.mk index 70765bc7..de0ec832 100644 --- a/daemon/client/Debug/subdir.mk +++ b/daemon/client/Debug/subdir.mk @@ -19,8 +19,8 @@ CPP_DEPS += \ # Each subdirectory must supply rules for building sources it contributes %.o: ../%.cpp @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" + @echo 'Invoking: C++ Compiler' + $(CXX) -std=c++11 -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' diff --git a/daemon/client/client.cpp b/daemon/client/client.cpp index def97e2a..a9d956de 100644 --- a/daemon/client/client.cpp +++ b/daemon/client/client.cpp @@ -106,7 +106,7 @@ namespace PCMDaemon { { int sharedMemoryId; FILE *fp = fopen (shmIdLocation_.c_str(), "r"); - if (fp <= 0) + if (!fp) { std::cerr << "Failed to open to shared memory key location: " << shmIdLocation_ << "\n"; exit(EXIT_FAILURE); diff --git a/daemon/client/client.h b/daemon/client/client.h index df8479b1..43c5ede0 100644 --- a/daemon/client/client.h +++ b/daemon/client/client.h @@ -38,7 +38,7 @@ namespace PCMDaemon { int pollIntervalMs_; std::string shmIdLocation_; bool shmAttached_; - PCMDaemon::SharedPCMState* sharedPCMState_; + PCMDaemon::SharedPCMState* sharedPCMState_ = nullptr; PCMDaemon::uint64 lastUpdatedClientTsc_; }; diff --git a/daemon/daemon/Debug/makefile b/daemon/daemon/Debug/makefile index a2e72f3c..ae0de046 100644 --- a/daemon/daemon/Debug/makefile +++ b/daemon/daemon/Debug/makefile @@ -47,8 +47,8 @@ endif # Tool invocations daemon: pre-build $(OBJS) @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - g++ -o "daemon" $(OBJS) $(USER_OBJS) $(LIBS) + @echo 'Invoking: C++ Linker' + $(CXX) -o "daemon" $(OBJS) $(USER_OBJS) $(LIBS) @echo 'Finished building target: $@' @echo ' ' @@ -61,7 +61,7 @@ pre-build: $(USER_OBJS) %.o: ../../../%.cpp -@echo 'Build PCM' - -g++ ${CXXFLAGS} -c $< -o $@ -std=c++11 + -$(CXX) ${CXXFLAGS} -c $< -o $@ -std=c++11 -@echo ' ' .PHONY: all clean dependents diff --git a/daemon/daemon/Debug/objects.mk b/daemon/daemon/Debug/objects.mk index d69bdddb..1f1b252b 100644 --- a/daemon/daemon/Debug/objects.mk +++ b/daemon/daemon/Debug/objects.mk @@ -2,7 +2,7 @@ # Automatically-generated file. Do not edit! ################################################################################ -USER_OBJS := ./utils.o ./pci.o ./msr.o ./mmio.o ./client_bw.o ./cpucounters.o ./topology.o ./debug.o ./threadpool.o +USER_OBJS := ./utils.o ./pci.o ./msr.o ./mmio.o ./client_bw.o ./cpucounters.o ./topology.o ./debug.o ./threadpool.o ./resctrl.o LIBS := -lpthread -lrt diff --git a/daemon/daemon/Debug/subdir.mk b/daemon/daemon/Debug/subdir.mk index cbb149de..e5eccb4d 100644 --- a/daemon/daemon/Debug/subdir.mk +++ b/daemon/daemon/Debug/subdir.mk @@ -21,8 +21,8 @@ CPP_DEPS += \ # Each subdirectory must supply rules for building sources it contributes %.o: ../%.cpp @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O0 -g3 -Wall -c -fmessage-length=0 -Wno-unknown-pragmas -std=c++11 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" + @echo 'Invoking: C++ Compiler' + $(CXX) -O0 -g3 -Wall -c -fmessage-length=0 -Wno-unknown-pragmas -std=c++11 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' diff --git a/daemon/daemon/common.h b/daemon/daemon/common.h index b8b40e48..2554a257 100644 --- a/daemon/daemon/common.h +++ b/daemon/daemon/common.h @@ -62,30 +62,30 @@ namespace PCMDaemon { typedef struct PCMSystem PCMSystem; struct PCMCoreCounter { - uint64 coreId; - int32 socketId; - double instructionsPerCycle; - uint64 cycles; - uint64 instructionsRetired; - double execUsage; - double relativeFrequency; - double activeRelativeFrequency; - uint64 l3CacheMisses; - uint64 l3CacheReference; - uint64 l2CacheMisses; - double l3CacheHitRatio; - double l2CacheHitRatio; - double l3CacheMPI; - double l2CacheMPI; + uint64 coreId = 0; + int32 socketId = 0; + double instructionsPerCycle = 0.; + uint64 cycles = 0; + uint64 instructionsRetired = 0; + double execUsage = 0.; + double relativeFrequency = 0.; + double activeRelativeFrequency = 0.; + uint64 l3CacheMisses = 0; + uint64 l3CacheReference = 0; + uint64 l2CacheMisses = 0; + double l3CacheHitRatio = 0.; + double l2CacheHitRatio = 0.; + double l3CacheMPI = 0.; + double l2CacheMPI = 0.; bool l3CacheOccupancyAvailable; uint64 l3CacheOccupancy; bool localMemoryBWAvailable; uint64 localMemoryBW; bool remoteMemoryBWAvailable; uint64 remoteMemoryBW; - uint64 localMemoryAccesses; - uint64 remoteMemoryAccesses; - int32 thermalHeadroom; + uint64 localMemoryAccesses = 0; + uint64 remoteMemoryAccesses = 0; + int32 thermalHeadroom = 0; public: PCMCoreCounter() : @@ -131,7 +131,7 @@ namespace PCMDaemon { typedef struct PCMMemoryChannelCounter PCMMemoryChannelCounter; struct PCMMemorySocketCounter { - uint64 socketId; + uint64 socketId = 0; PCMMemoryChannelCounter channels[MEMORY_MAX_IMC_CHANNELS]; uint32 numOfChannels; float read; @@ -191,7 +191,7 @@ namespace PCMDaemon { typedef struct PCMQPILinkCounter PCMQPILinkCounter; struct PCMQPISocketCounter { - uint64 socketId; + uint64 socketId = 0; PCMQPILinkCounter links[QPI_MAX_LINKS]; uint64 total; diff --git a/daemon/daemon/daemon.cpp b/daemon/daemon/daemon.cpp index 9e35048e..7e4dd22d 100644 --- a/daemon/daemon/daemon.cpp +++ b/daemon/daemon/daemon.cpp @@ -341,7 +341,7 @@ namespace PCMDaemon { //Store shm id in a file (shmIdLocation_) FILE *fp = fopen (shmIdLocation_.c_str(), "w"); - if (fp < 0) + if (!fp) { std::cerr << "Failed to create/write to shared memory key location: " << shmIdLocation_ << "\n"; exit(EXIT_FAILURE); diff --git a/daemon/daemon/daemon.h b/daemon/daemon/daemon.h index 9843ee6c..124c6d9e 100644 --- a/daemon/daemon/daemon.h +++ b/daemon/daemon/daemon.h @@ -32,6 +32,8 @@ namespace PCMDaemon { Daemon(int argc, char *argv[]); ~Daemon(); int run(); + Daemon (const Daemon &) = delete; + Daemon & operator = (const Daemon &) = delete; private: void setupPCM(); void checkAccessAndProgramPCM(); diff --git a/daemon/daemon/test/Makefile b/daemon/daemon/test/Makefile index 22596a9e..ea66c6d7 100644 --- a/daemon/daemon/test/Makefile +++ b/daemon/daemon/test/Makefile @@ -1,4 +1,3 @@ -CC=g++ CFLAGS=-c -Wall -std=c++11 LDFLAGS= SOURCES=main.cpp @@ -9,11 +8,11 @@ RM=rm all: $(SOURCES) $(EXECUTABLE) $(EXECUTABLE): $(OBJECTS) - $(CC) $(LDFLAGS) $(OBJECTS) -o $@ + $(CXX) $(LDFLAGS) $(OBJECTS) -o $@ .cpp.o: - $(CC) $(CFLAGS) $< -o $@ + $(CXX) $(CFLAGS) $< -o $@ clean: -$(RM) $(OBJECTS) $(EXECUTABLE) - -@echo ' ' \ No newline at end of file + -@echo ' ' diff --git a/memoptest.cpp b/memoptest.cpp index d3517e59..8015dae2 100644 --- a/memoptest.cpp +++ b/memoptest.cpp @@ -33,8 +33,8 @@ inline double my_timestamp() struct T { - int key[1]; - int data[3]; + int key[1] = { 0 }; + int data[3] = { 0, 0, 0 }; T() { } T(int a) { key[0] = a; } @@ -79,7 +79,7 @@ void stream_write_task(Y * p, Y * e, int value) template void read_intensive_task(Y * p, Y * e, int value) { - std::find(p, e, -1); + std::find(p, e, -1); // cppcheck-suppress ignoredReturnValue } diff --git a/mmio.cpp b/mmio.cpp index 0a22ae76..a40f2413 100644 --- a/mmio.cpp +++ b/mmio.cpp @@ -53,7 +53,7 @@ class PCMPmem : public WinPmem { wcscat_s(driver_filename, MAX_PATH, L"\\winpmem_x64.sys"); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/google/rekall/raw/master/tools/pmem/resources/winpmem/winpmem_x64.sys .\n"; + std::cerr << "ERROR: winpmem_x64.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x64.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; @@ -61,7 +61,7 @@ class PCMPmem : public WinPmem { wcscat_s(driver_filename, MAX_PATH, L"\\winpmem_x86.sys"); if (GetFileAttributes(driver_filename) == INVALID_FILE_ATTRIBUTES) { - std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/google/rekall/raw/master/tools/pmem/resources/winpmem/winpmem_x86.sys .\n"; + std::cerr << "ERROR: winpmem_x86.sys not found in current directory. Download it from https://github.com/Velocidex/WinPmem/blob/master/kernel/binaries/winpmem_x86.sys .\n"; std::cerr << "ERROR: Memory bandwidth statistics will not be available.\n"; } break; @@ -204,4 +204,4 @@ MMIORange::~MMIORange() #endif -} // namespace pcm \ No newline at end of file +} // namespace pcm diff --git a/msr.cpp b/msr.cpp index e05e303f..916dbd89 100644 --- a/msr.cpp +++ b/msr.cpp @@ -39,6 +39,8 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include #endif +#include + namespace pcm { #ifdef _MSC_VER @@ -236,6 +238,11 @@ MsrHandle::~MsrHandle() int32 MsrHandle::write(uint64 msr_number, uint64 value) { +#if 0 + static std::mutex m; + std::lock_guard g(m); + std::cout << "DEBUG: writing MSR 0x" << std::hex << msr_number << " value 0x" << value << " on cpu " << std::dec << cpu_id << std::endl; +#endif return ::pwrite(fd, (const void *)&value, sizeof(uint64), msr_number); } diff --git a/msrtest.cpp b/msrtest.cpp index e4cb4555..0c50a2a7 100644 --- a/msrtest.cpp +++ b/msrtest.cpp @@ -30,7 +30,7 @@ int main() for (i = 0; i < NUM_CORES; ++i) { cpu_msr[i] = new MsrHandle(i); - assert(cpu_msr >= 0); + assert(cpu_msr[i]); FixedEventControlRegister ctrl_reg; res = cpu_msr[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value); diff --git a/pcm-memory.cpp b/pcm-memory.cpp index a6888ee0..0b38e090 100644 --- a/pcm-memory.cpp +++ b/pcm-memory.cpp @@ -45,7 +45,7 @@ using namespace std; using namespace pcm; -const uint32 max_sockets = 256; +constexpr uint32 max_sockets = 256; uint32 max_imc_channels = ServerUncoreCounterState::maxChannels; const uint32 max_edc_channels = ServerUncoreCounterState::maxChannels; const uint32 max_imc_controllers = ServerUncoreCounterState::maxControllers; @@ -336,13 +336,13 @@ void display_bandwidth(PCM *m, memdata_t *md, const uint32 no_columns, const boo \r|-- DDR4 Channel Monitoring --||-- MCDRAM Channel Monitoring --|\n\ \r|---------------------------------------||---------------------------------------|\n\ \r"; - uint32 max_channels = max_imc_channels <= max_edc_channels ? max_edc_channels : max_imc_channels; + const uint32 max_channels = (std::max)(max_edc_channels, max_imc_channels); if (show_channel_output) { float iMC_Rd, iMC_Wr, EDC_Rd, EDC_Wr; for (uint64 channel = 0; channel < max_channels; ++channel) { - if (channel <= max_imc_channels) + if (channel < max_imc_channels) { iMC_Rd = md->iMC_Rd_socket_chan[skt][channel]; iMC_Wr = md->iMC_Wr_socket_chan[skt][channel]; @@ -352,7 +352,7 @@ void display_bandwidth(PCM *m, memdata_t *md, const uint32 no_columns, const boo iMC_Rd = -1.0; iMC_Wr = -1.0; } - if (channel <= max_edc_channels) + if (channel < max_edc_channels) { EDC_Rd = md->EDC_Rd_socket_chan[skt][channel]; EDC_Wr = md->EDC_Wr_socket_chan[skt][channel]; @@ -360,7 +360,7 @@ void display_bandwidth(PCM *m, memdata_t *md, const uint32 no_columns, const boo else { EDC_Rd = -1.0; - EDC_Rd = -1.0; + EDC_Wr = -1.0; } if (iMC_Rd >= 0.0 && iMC_Wr >= 0.0 && EDC_Rd >= 0.0 && EDC_Wr >= 0.0) @@ -526,7 +526,7 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const << setw(8) << PMM_MM_Ratio(md, skt) << ','; }); } - if (m->getCPUModel() != PCM::KNL) + if (m->MCDRAMmemoryTrafficMetricsAvailable() == false) { if (md->PMM == false && md->PMMMixedMode == false) { @@ -640,7 +640,7 @@ void calculate_bandwidth(PCM *m, const ServerUncoreCounterState uncState1[], con md.PMM = PMM; md.PMMMixedMode = PMMMixedMode; - for(uint32 skt = 0; skt < m->getNumSockets(); ++skt) + for(uint32 skt = 0; skt < max_sockets; ++skt) { md.iMC_Rd_socket[skt] = 0.0; md.iMC_Wr_socket[skt] = 0.0; @@ -654,6 +654,10 @@ void calculate_bandwidth(PCM *m, const ServerUncoreCounterState uncState1[], con { md.M2M_NM_read_hit_rate[skt][i] = 0.; } + } + + for(uint32 skt = 0; skt < m->getNumSockets(); ++skt) + { const uint32 numChannels1 = (uint32)m->getMCChannels(skt, 0); // number of channels in the first controller auto toBW = [&elapsedTime](const uint64 nEvents) @@ -661,9 +665,8 @@ void calculate_bandwidth(PCM *m, const ServerUncoreCounterState uncState1[], con return (float)(nEvents * 64 / 1000000.0 / (elapsedTime / 1000.0)); }; - switch (m->getCPUModel()) + if (m->MCDRAMmemoryTrafficMetricsAvailable()) { - case PCM::KNL: for (uint32 channel = 0; channel < max_edc_channels; ++channel) { if (skipInactiveChannels && getEDCCounter(channel, ServerPCICFGUncore::EventPosition::READ, uncState1[skt], uncState2[skt]) == 0.0 && getEDCCounter(channel, ServerPCICFGUncore::EventPosition::WRITE, uncState1[skt], uncState2[skt]) == 0.0) @@ -679,8 +682,9 @@ void calculate_bandwidth(PCM *m, const ServerUncoreCounterState uncState1[], con md.EDC_Rd_socket[skt] += md.EDC_Rd_socket_chan[skt][channel]; md.EDC_Wr_socket[skt] += md.EDC_Wr_socket_chan[skt][channel]; } - /* fall-through */ - default: + } + + { for (uint32 channel = 0; channel < max_imc_channels; ++channel) { uint64 reads = 0, writes = 0, pmmReads = 0, pmmWrites = 0, pmmMemoryModeCleanMisses = 0, pmmMemoryModeDirtyMisses = 0; diff --git a/pcm-raw.cpp b/pcm-raw.cpp index 492bdaef..5c2db2a4 100644 --- a/pcm-raw.cpp +++ b/pcm-raw.cpp @@ -372,7 +372,7 @@ int main(int argc, char* argv[]) #endif cerr << "\n"; - cerr << " Processor Counter Monitor: Core Monitoring Utility \n"; + cerr << " Processor Counter Monitor: Raw Event Monitoring Utility \n"; cerr << "\n"; double delay = -1.0; diff --git a/pcm-sensor-server.cpp b/pcm-sensor-server.cpp index 6d80e5b8..4b8e9a6c 100644 --- a/pcm-sensor-server.cpp +++ b/pcm-sensor-server.cpp @@ -192,8 +192,7 @@ std::string read_ndctl_info( std::ofstream& logfile ) { exit(50); } std::stringstream ndctl; - int pid; - if ( (pid = fork()) == 0 ) { + if ( fork() == 0 ) { // child, writes to pipe, close read-end close( pipes[0] ); dup2( pipes[1], fileno(stdout) ); @@ -288,21 +287,21 @@ class JSONPrinter : Visitor CoreCounterState const getCoreCounter( std::shared_ptr ag, uint32 tid ) const { CoreCounterState ccs; if ( nullptr == ag.get() ) - return std::move( ccs ); + return ccs; return std::move( ag->coreCounterStates()[tid] ); } SocketCounterState const getSocketCounter( std::shared_ptr ag, uint32 sid ) const { SocketCounterState socs; if ( nullptr == ag.get() ) - return std::move( socs ); + return socs; return std::move( ag->socketCounterStates()[sid] ); } SystemCounterState getSystemCounter( std::shared_ptr ag ) const { SystemCounterState sycs; if ( nullptr == ag.get() ) - return std::move( sycs ); + return sycs; return std::move( ag->systemCounterState() ); } @@ -548,21 +547,21 @@ class PrometheusPrinter : Visitor CoreCounterState const getCoreCounter( std::shared_ptr ag, uint32 tid ) const { CoreCounterState ccs; if ( nullptr == ag.get() ) - return std::move( ccs ); + return ccs; return std::move( ag->coreCounterStates()[tid] ); } SocketCounterState const getSocketCounter( std::shared_ptr ag, uint32 sid ) const { SocketCounterState socs; if ( nullptr == ag.get() ) - return std::move( socs ); + return socs; return std::move( ag->socketCounterStates()[sid] ); } SystemCounterState getSystemCounter( std::shared_ptr ag ) const { SystemCounterState sycs; if ( nullptr == ag.get() ) - return std::move( sycs ); + return sycs; return std::move( ag->systemCounterState() ); } @@ -849,7 +848,7 @@ class basic_socketbuf : public std::basic_streambuf { #endif bytesSent= ::send( socketFD_, (void*)outputBuffer_, bytesToSend, MSG_NOSIGNAL ); if ( -1 == bytesSent ) { - strerror( errno ); + std::cerr << strerror( errno ) << "\n"; return traits_type::eof(); } #if defined (USE_SSL) @@ -1518,7 +1517,7 @@ struct URL { authority = fullURL.substr( authorityPos+2, authorityEndPos - (authorityPos + 2) ); DBG( 3, "authority: '", authority, "'" ); - size_t atPos = authority.find( '@' ); + const size_t atPos = authority.find( '@' ); bool atFound = (atPos != std::string::npos); if ( atFound ) { if ( atPos == 0 ) @@ -1547,8 +1546,7 @@ struct URL { url.hasUser_ = true; // delete user/pass including the at authority.erase( 0, atPos+1 ); - } else - atPos = 0; + } // Instead of all the logic it is easier to work on substrings @@ -2649,7 +2647,7 @@ void HTTPServer::run() { socklen_t sa_len = sizeof( struct sockaddr_in ); int retval = ::accept( serverSocket_, (struct sockaddr*)&clientAddress, &sa_len ); if ( -1 == retval ) { - ::strerror( errno ); + std::cerr << ::strerror( errno ) << "\n"; continue; } clientSocketFD = retval; @@ -2659,7 +2657,7 @@ void HTTPServer::run() { ::memset( ipbuf, 0, 16 ); char const * resbuf = ::inet_ntop( AF_INET, &(clientAddress.sin_addr), ipbuf, INET_ADDRSTRLEN ); if ( nullptr == resbuf ) { - ::strerror( errno ); + std::cerr << ::strerror( errno ) << "\n"; ::close( clientSocketFD ); continue; } @@ -2718,7 +2716,7 @@ class HTTPSServer : public HTTPServer { } private: - SSL_CTX* sslCTX_; + SSL_CTX* sslCTX_ = nullptr; std::string certificateFile_; std::string privateKeyFile_; }; @@ -2736,7 +2734,7 @@ void HTTPSServer::run() { socklen_t sa_len = sizeof( struct sockaddr_in ); int retval = ::accept( serverSocket_, (struct sockaddr*)&clientAddress, &sa_len ); if ( -1 == retval ) { - ::strerror( errno ); + std::cerr << strerror( errno ) << "\n"; continue; } clientSocketFD = retval; @@ -2754,7 +2752,7 @@ void HTTPSServer::run() { memset( ipbuf, 0, 16 ); char const * resbuf = ::inet_ntop( AF_INET, &(clientAddress.sin_addr), ipbuf, INET_ADDRSTRLEN ); if ( nullptr == resbuf ) { - ::strerror( errno ); + std::cerr << strerror( errno ) << "\n"; ::close( clientSocketFD ); continue; } diff --git a/pmu-query.py b/pmu-query.py index 00276387..03937dc8 100755 --- a/pmu-query.py +++ b/pmu-query.py @@ -42,7 +42,7 @@ if platform.system() == 'CYGWIN_NT-6.1': p = subprocess.Popen(['./pcm-core.exe -c'],stdout=subprocess.PIPE,shell=True) elif platform.system() == 'Windows': - p = subprocess.Popen(['pcm-core.exe -c'],stdout=subprocess.PIPE,shell=True) + p = subprocess.Popen(['pcm-core.exe', '-c'],stdout=subprocess.PIPE,shell=True) else: p = subprocess.Popen(['./pcm-core.x -c'],stdout=subprocess.PIPE,shell=True) diff --git a/readmem.cpp b/readmem.cpp index aa20f458..2f15caab 100644 --- a/readmem.cpp +++ b/readmem.cpp @@ -31,8 +31,8 @@ inline double my_timestamp() struct T { - int key[1]; - int data[15]; + int key[1] = { 0 }; + int data[15] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; T() { } T(int a) { key[0] = a; } @@ -46,7 +46,7 @@ struct T template void Memory_intensive_task(DS & ds) { - std::find(ds.begin(), ds.end(), ds.size()); + std::find(ds.begin(), ds.end(), ds.size()); // cppcheck-suppress ignoredReturnValue } diff --git a/realtime.cpp b/realtime.cpp index 477d7aa6..40c0dc86 100644 --- a/realtime.cpp +++ b/realtime.cpp @@ -79,10 +79,10 @@ template void measure(DS & ds, size_t repeat, size_t nelements) { SystemCounterState before_sstate, after_sstate; - double before_ts, after_ts; + double before_ts = 0.0, after_ts; // warm up - std::find(ds.begin(), ds.end(), nelements); + std::find(ds.begin(), ds.end(), nelements); // cppcheck-suppress ignoredReturnValue double before1_ts; #if 0 @@ -99,7 +99,7 @@ void measure(DS & ds, size_t repeat, size_t nelements) } #endif - for (int j = 0; j < repeat; ++j) std::find(ds.begin(), ds.end(), nelements); + for (int j = 0; j < repeat; ++j) std::find(ds.begin(), ds.end(), nelements); // cppcheck-suppress ignoredReturnValue // stop measuring after_sstate = getSystemCounterState(); @@ -143,8 +143,8 @@ typedef int T; struct T { - int key[1]; - int data[15]; + int key[1] = { 0 }; + int data[15] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };; T() { } T(int a) { key[0] = a; } diff --git a/resctrl.cpp b/resctrl.cpp new file mode 100644 index 00000000..358e8e3a --- /dev/null +++ b/resctrl.cpp @@ -0,0 +1,146 @@ +/* +Copyright (c) 2020, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +// written by Roman Dementiev + +#ifdef __linux__ + +#include "resctrl.h" +#include "cpucounters.h" +#include +#include +#include +#include +#include + +namespace pcm +{ + bool Resctrl::isMounted() + { + struct stat st; + if (stat("/sys/fs/resctrl/mon_groups", &st) < 0) + { + return false; + } + return true; + } + void Resctrl::init() + { + if (isMounted() == false) + { + std::cerr << "ERROR: /sys/fs/resctrl is not mounted\n"; + std::cerr << "ERROR: RDT metrics (L3OCC,LMB,RMB) will not be available\n"; + std::cerr << "Mount it to make it work: mount -t resctrl resctrl /sys/fs/resctrl\n"; + return; + } + const auto numCores = pcm.getNumCores(); + for (unsigned int c = 0; c < numCores; ++c) + { + if (pcm.isCoreOnline(c)) + { + const auto C = std::to_string(c); + const auto dir = std::string(PCMPath) + C; + struct stat st; + if (stat(dir.c_str(), &st) < 0 && mkdir(dir.c_str(), 0700) < 0) + { + std::cerr << "INFO: can't create directory " << dir << " error: " << strerror(errno) << "\n"; + const auto containerDir = std::string("/pcm") + dir; + if (stat(containerDir.c_str(), &st) < 0 && mkdir(containerDir.c_str(), 0700) < 0) + { + std::cerr << "INFO: can't create directory " << containerDir << " error: " << strerror(errno) << "\n"; + std::cerr << "ERROR: RDT metrics (L3OCC,LMB,RMB) will not be available\n"; + break; + } + } + const auto cpus_listFilename = dir + "/cpus_list"; + writeSysFS(cpus_listFilename.c_str(), C, false); + auto generateMetricFiles = [&dir, c] (PCM & pcm, const std::string & metric, FileMapType & fileMap) + { + auto getMetricFilename = [] (const std::string & dir, const uint64 s, const std::string & metric) + { + std::ostringstream ostr; + ostr << dir << "/mon_data/mon_L3_" << std::setfill('0') << std::setw(2) << s << "/" << metric; + return ostr.str(); + }; + for (uint64 s = 0; s < pcm.getNumSockets(); ++s) + { + fileMap[c].push_back(getMetricFilename(dir, s, metric)); + } + }; + if (pcm.L3CacheOccupancyMetricAvailable()) + { + generateMetricFiles(pcm, "llc_occupancy", L3OCC); + } + if (pcm.CoreLocalMemoryBWMetricAvailable()) + { + generateMetricFiles(pcm, "mbm_local_bytes", MBL); + } + if (pcm.CoreRemoteMemoryBWMetricAvailable()) + { + generateMetricFiles(pcm, "mbm_total_bytes", MBT); + } + } + } + } + void Resctrl::cleanup() + { + const auto numCores = pcm.getNumCores(); + for (unsigned int c = 0; c < numCores; ++c) + { + if (pcm.isCoreOnline(c)) + { + const auto dir = std::string(PCMPath) + std::to_string(c); + rmdir(dir.c_str()); + const auto containerDir = std::string("/pcm") + dir; + rmdir(containerDir.c_str()); + } + } + } + size_t Resctrl::getMetric(Resctrl::FileMapType & fileMap, int core) + { + auto files = fileMap[core]; + size_t result = 0; + for (auto f : files) + { + const auto data = readSysFS(f.c_str(), false); + if (data.empty() == false) + { + result += atoll(data.c_str()); + } + else + { + static std::mutex lock; + std::lock_guard _(lock); + std::cerr << "Error reading " << f << ". Error: " << strerror(errno) << "\n"; + if (errno == 24) + { + std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n"; + } + } + } + return result; + } + size_t Resctrl::getL3OCC(int core) + { + return getMetric(L3OCC, core); + } + size_t Resctrl::getMBL(int core) + { + return getMetric(MBL, core); + } + size_t Resctrl::getMBT(int core) + { + return getMetric(MBT, core); + } +}; + + #endif // __linux__ \ No newline at end of file diff --git a/resctrl.h b/resctrl.h new file mode 100644 index 00000000..d4a0c886 --- /dev/null +++ b/resctrl.h @@ -0,0 +1,53 @@ +/* +Copyright (c) 2020, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +// written by Roman Dementiev + +#pragma once + +/*! \file resctrl.h + \brief interface to MBM and CMT using Linux resctrl + */ + +#ifdef __linux__ + +#include +#include +#include +#include +#include +#include + +namespace pcm +{ + class PCM; + + class Resctrl + { + PCM & pcm; + typedef std::unordered_map > FileMapType; + FileMapType L3OCC, MBL, MBT; + Resctrl() = delete; + size_t getMetric(FileMapType & fileMap, int core); + static constexpr auto PCMPath = "/sys/fs/resctrl/mon_groups/pcm"; + public: + Resctrl(PCM & m) : pcm(m) {} + bool isMounted(); + void init(); + size_t getL3OCC(int core); + size_t getMBL(int core); + size_t getMBT(int core); + void cleanup(); + }; +}; + + #endif // __linux__ \ No newline at end of file diff --git a/topology.h b/topology.h index 3b73d9ce..ed91f820 100644 --- a/topology.h +++ b/topology.h @@ -286,6 +286,8 @@ class ClientUncore : public Uncore }; class Socket : public SystemObject { + Socket(const Socket &) = delete; + Socket & operator = (const Socket &) = delete; public: Socket( PCM* m, int32 apicID, int32 logicalID ); virtual ~Socket() { diff --git a/types.h b/types.h index 16fc3a9e..073a8308 100644 --- a/types.h +++ b/types.h @@ -27,6 +27,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include #include #include +#include #ifdef _MSC_VER #include @@ -56,6 +57,9 @@ typedef signed int int32; #define IA32_PERFEVTSEL2_ADDR (IA32_PERFEVTSEL0_ADDR + 2) #define IA32_PERFEVTSEL3_ADDR (IA32_PERFEVTSEL0_ADDR + 3) +constexpr auto IA32_PERF_GLOBAL_STATUS = 0x38E; +constexpr auto IA32_PERF_GLOBAL_OVF_CTRL = 0x390; + #define PERF_MAX_FIXED_COUNTERS (3) #define PERF_MAX_CUSTOM_COUNTERS (8) #define PERF_MAX_COUNTERS (PERF_MAX_FIXED_COUNTERS + PERF_MAX_CUSTOM_COUNTERS) @@ -1044,6 +1048,10 @@ struct MCFGRecord unsigned char startBusNumber; unsigned char endBusNumber; char reserved[4]; + MCFGRecord() + { + memset(this, 0, sizeof(MCFGRecord)); + } void print() { std::cout << "BaseAddress=" << (std::hex) << "0x" << baseAddress << " PCISegmentGroupNumber=0x" << PCISegmentGroupNumber << diff --git a/utils.h b/utils.h index 5db372e7..15c4f092 100644 --- a/utils.h +++ b/utils.h @@ -150,7 +150,29 @@ class ThreadGroupTempAffinity }; #endif +class checked_uint64 // uint64 with checking for overflows when computing differences +{ + uint64 data; + uint64 overflows; +public: + checked_uint64() : data(0), overflows(0) {} + checked_uint64(const uint64 d, const uint64 o) : data(d), overflows(o) {} + const checked_uint64& operator += (const checked_uint64& o) + { + data += o.data; + overflows += o.overflows; + return *this; + } + + uint64 operator - (const checked_uint64& o) const + { + // computing data - o.data + constexpr uint64 counter_width = 48; + return data + overflows * (1ULL << counter_width) - o.data; + } + uint64 getRawData_NoOverflowProtection() const { return data; } +}; // a secure (but partial) alternative for sscanf // see example usage in pcm-core.cpp @@ -296,6 +318,12 @@ class MainLoop } }; +#ifdef __linux__ +FILE * tryOpen(const char * path, const char * mode); +std::string readSysFS(const char * path, bool silent); +bool writeSysFS(const char * path, const std::string & value, bool silent); +#endif + int calibratedSleep(const double delay, const char* sysCmd, const MainLoop& mainLoop, PCM* m); struct StackedBarItem {