cholla-hydro · evaneschneider · Sep 22, 2023 · Sep 20, 2023 · Sep 20, 2023 · Sep 21, 2023
diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp
@@ -469,7 +469,7 @@ Real Grid3D::Update_Grid(void)
   }
 
 #ifdef CPU_TIME
-  Timer.Hydro_Integrator.End();
+  Timer.Hydro_Integrator.End(true);
 #endif  // CPU_TIME
 
 #ifdef CUDA

diff --git a/src/main.cpp b/src/main.cpp
@@ -354,7 +354,9 @@ int main(int argc, char *argv[])
 #ifdef N_STEPS_LIMIT
     // Exit the loop when reached the limit number of steps (optional)
     if (G.H.n_step == N_STEPS_LIMIT) {
+  #ifdef OUTPUT
       WriteData(G, P, nfile);
+  #endif  // OUTPUT
       break;
     }
 #endif

diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp
@@ -67,6 +67,7 @@ static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE;
   #define cudaPointerGetAttributes           hipPointerGetAttributes
   #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
   #define cudaMemGetInfo                     hipMemGetInfo
+  #define cudaDeviceGetPCIBusId              hipDeviceGetPCIBusId
 
   // Texture definitions
   #define cudaArray           hipArray

diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp
@@ -1,11 +1,13 @@
 #include "../utils/timing_functions.h"
 #ifdef CPU_TIME
 
+  #include <algorithm>
   #include <fstream>
   #include <iostream>
   #include <string>
 
   #include "../global/global.h"
+  #include "../global/global_cuda.h"
   #include "../io/io.h"
 
   #ifdef MPI_CHOLLA
@@ -28,7 +30,7 @@ void OneTime::Subtract(Real time_to_subtract)
   time_start += time_to_subtract;
 }
 
-void OneTime::End()
+void OneTime::End(bool const print_high_values)
 {
   cudaDeviceSynchronize();
   if (inactive) {
@@ -50,6 +52,32 @@ void OneTime::End()
     t_all += t_max;
   }
   n_steps++;
+
+  #ifdef MPI_CHOLLA
+  // Print out information if the process is unusually slow
+  if ((time >= 1.1 * t_avg) and (n_steps > 0) and print_high_values) {
+    // Get node ID
+    std::string node_id(MPI_MAX_PROCESSOR_NAME, ' ');
+    int length;
+    MPI_Get_processor_name(node_id.data(), &length);
+    node_id.resize(length);
+
+    // Get GPU ID
+    std::string gpu_id(MPI_MAX_PROCESSOR_NAME, ' ');
+    int device;
+    CudaSafeCall(cudaGetDevice(&device));
+    CudaSafeCall(cudaDeviceGetPCIBusId(gpu_id.data(), gpu_id.size(), device));
+    gpu_id.erase(
+        std::find_if(gpu_id.rbegin(), gpu_id.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(),
+        gpu_id.end());
+
+    std::cerr << "WARNING: Rank took longer than expected to execute." << std::endl
+              << "         Node Time: " << time << std::endl
+              << "         Avg Time: " << t_avg << std::endl
+              << "         Node ID: " << node_id << std::endl
+              << "         GPU PCI Bus ID: " << gpu_id << std::endl;
+  }
+  #endif  // MPI_CHOLLA
 }
 
 void OneTime::RecordTime(Real time)

diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h
@@ -28,7 +28,7 @@ class OneTime
   }
   void Start();
   void Subtract(Real time_to_subtract);
-  void End();
+  void End(bool const print_high_values = false);
   void PrintStep();
   void PrintAverage();
   void PrintAll();