From 7df79886dda64b97baf74feb2be4ea3a5f877eb7 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Fri, 5 Jun 2026 07:19:51 +0800 Subject: [PATCH] fix: log block eigenvalue summary events Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com> --- deepspeed/runtime/engine.py | 23 ++++++++++++++--------- tests/unit/runtime/test_engine.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 tests/unit/runtime/test_engine.py diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py index 82a7592f14cb..f76dbebdf487 100755 --- a/deepspeed/runtime/engine.py +++ b/deepspeed/runtime/engine.py @@ -2814,6 +2814,19 @@ def zero_grad(self): for param_name, param in self.module.named_parameters(): param.grad = None + def _eigenvalue_summary_events(self): + if not (self.eigenvalue_enabled() and not self.gas_boundary_ctr % self.eigenvalue_gas_boundary_resolution()): + return [] + + events = [] + for i, ev_value in enumerate(self.block_eigenvalue.values()): + events.append(( + f"Train/Eigenvalues/ModelBlockParam_{i}", + ev_value[0], + self.global_samples, + )) + return events + def clip_fp32_gradients(self): clip_grad_norm_(parameters=self.module.parameters(), max_norm=self.gradient_clipping(), mpu=self.mpu) @@ -2963,15 +2976,7 @@ def step(self, lr_kwargs=None): self.global_samples, )) - if (self.eigenvalue_enabled() - and not self.gas_boundary_ctr % self.eigenvalue_gas_boundary_resolution()): - ev_values = self.block_eigenvalue.values() - for i in range(len(ev_values)): - self.summary_events.append(( - f"Train/Eigenvalues/ModelBlockParam_{i}", - self.ev_values[i][0], - self.global_samples, - )) + self.summary_events.extend(self._eigenvalue_summary_events()) self.monitor.write_events(self.summary_events) # Check flops profiling diff --git a/tests/unit/runtime/test_engine.py b/tests/unit/runtime/test_engine.py new file mode 100644 index 000000000000..88940e73cc3b --- /dev/null +++ b/tests/unit/runtime/test_engine.py @@ -0,0 +1,31 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +from deepspeed.runtime.engine import DeepSpeedEngine + + +def test_eigenvalue_summary_events_use_block_eigenvalue_values(): + engine = object.__new__(DeepSpeedEngine) + engine.block_eigenvalue = { + "block_a": (0.25, 0), + "block_b": (0.5, 1), + } + engine.gas_boundary_ctr = 4 + engine.global_samples = 128 + engine.eigenvalue_enabled = lambda: True + engine.eigenvalue_gas_boundary_resolution = lambda: 2 + + assert engine._eigenvalue_summary_events() == [ + ("Train/Eigenvalues/ModelBlockParam_0", 0.25, 128), + ("Train/Eigenvalues/ModelBlockParam_1", 0.5, 128), + ] + + +def test_eigenvalue_summary_events_skip_non_boundary_steps(): + engine = object.__new__(DeepSpeedEngine) + engine.block_eigenvalue = {"block_a": (0.25, 0)} + engine.gas_boundary_ctr = 3 + engine.eigenvalue_enabled = lambda: True + engine.eigenvalue_gas_boundary_resolution = lambda: 2 + + assert engine._eigenvalue_summary_events() == []