Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,25 @@ If `recording.sampling.mode: adaptive` is enabled in `.tusk/config.yaml`, this e

For more details on sampling rate configuration methods and precedence, see the [Initialization Guide](./initialization.md#configure-sampling-rate).

## TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS

Controls whether adaptive sampling emits transition logs like `Adaptive sampling updated (...)`.

- **Type:** Boolean (`true`/`false`, `1`/`0`, `yes`/`no`, `on`/`off`)
- **If unset:** Falls back to `recording.sampling.log_transitions` in `.tusk/config.yaml`, then defaults to `True`
- **Precedence:** Overrides `recording.sampling.log_transitions`
- **Scope:** Only affects adaptive sampling transition logs. It does not change recording decisions or the global SDK log level

**Examples:**

```bash
# Keep adaptive sampling active but silence transition logs
TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS=false python app.py

# Explicitly re-enable transition logs
TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS=true python app.py
```

## Rust Core Flags

These variables control optional Rust-accelerated paths in the SDK.
Expand Down
19 changes: 19 additions & 0 deletions docs/initialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,19 @@ recording:
sampling_rate: 0.1
```

### Adaptive Sampling Logs

When adaptive mode changes state or multiplier, the SDK logs an `Adaptive sampling updated (...)` line at `info` level.

- `state`: controller state such as `healthy`, `warm`, `hot`, or `critical_pause`
- `multiplier`: factor applied to `base_rate`
- `effective_rate`: current root-request recording rate after shedding
- `pressure`: highest normalized pressure signal (`0..1`) driving the update
- `queue_fill`: smoothed export-queue usage ratio; values near `1.0` mean the exporter is falling behind
- `memory_pressure_ratio`: current memory usage relative to its detected limit, when available

Set `recording.sampling.log_transitions: false` in `.tusk/config.yaml`, or set `TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS=false`, if you want to suppress these transition logs without changing the overall SDK log level. Raising `log_level="warn"` or higher will also hide them.

### Recording Configuration Options

<table>
Expand Down Expand Up @@ -287,6 +300,12 @@ recording:
<td><code>0.001</code> in <code>adaptive</code> mode</td>
<td>The minimum steady-state sampling rate for adaptive mode. In critical conditions the SDK can still temporarily pause recording.</td>
</tr>
<tr>
<td><code>sampling.log_transitions</code></td>
<td><code>bool</code></td>
<td><code>True</code></td>
<td>Controls whether adaptive sampling emits <code>Adaptive sampling updated (...)</code> transition logs. Can be overridden by <code>TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS</code>.</td>
</tr>
<tr>
<td><code>sampling_rate</code></td>
<td><code>float</code></td>
Expand Down
5 changes: 5 additions & 0 deletions drift/core/adaptive_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,12 @@ def __init__(
self,
config: ResolvedSamplingConfig,
*,
log_transitions: bool = True,
random_fn=random.random,
now_fn=time.monotonic,
) -> None:
self._config = config
self._log_transitions = log_transitions
self._random_fn = random_fn
self._now_fn = now_fn
self._lock = threading.RLock()
Expand Down Expand Up @@ -239,6 +241,9 @@ def _log_transition(
pressure: float,
snapshot: AdaptiveSamplingHealthSnapshot,
) -> None:
if not self._log_transitions:
return

if previous_state == self._state and abs(previous_multiplier - self._admission_multiplier) < 0.05:
return

Expand Down
10 changes: 10 additions & 0 deletions drift/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class SamplingConfig:
mode: str | None = None
base_rate: float | None = None
min_rate: float | None = None
log_transitions: bool | None = None


@dataclass
Expand Down Expand Up @@ -181,10 +182,19 @@ def _parse_recording_config(data: dict[str, Any]) -> RecordingConfig:
)
mode = None

log_transitions = raw_sampling.get("log_transitions")
if log_transitions is not None and not isinstance(log_transitions, bool):
logger.warning(
"Invalid 'sampling.log_transitions' in config: expected boolean, got "
f"{type(log_transitions).__name__}. This value will be ignored."
)
log_transitions = None

sampling = SamplingConfig(
mode=mode,
base_rate=float(base_rate) if base_rate is not None else None,
min_rate=float(min_rate) if min_rate is not None else None,
log_transitions=log_transitions,
)

return RecordingConfig(
Expand Down
36 changes: 34 additions & 2 deletions drift/core/drift_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self) -> None:
self._sampling_rate: float = 1.0
self._sampling_mode: str = "fixed"
self._min_sampling_rate: float = 0.0
self._sampling_log_transitions: bool = True
self._adaptive_sampling_controller: AdaptiveSamplingController | None = None
self._adaptive_sampling_thread: threading.Thread | None = None
self._adaptive_sampling_stop_event = threading.Event()
Expand Down Expand Up @@ -135,7 +136,7 @@ def _log_startup_summary(self, env: str, use_remote_export: bool) -> None:
)

logger.info(
"SDK initialized successfully (version=%s, mode=%s, env=%s, service=%s, serviceId=%s, exportSpans=%s, samplingMode=%s, samplingBaseRate=%s, samplingMinRate=%s, logLevel=%s, runtime=python %s, platform=%s/%s).",
"SDK initialized successfully (version=%s, mode=%s, env=%s, service=%s, serviceId=%s, exportSpans=%s, samplingMode=%s, samplingBaseRate=%s, samplingMinRate=%s, samplingLogTransitions=%s, logLevel=%s, runtime=python %s, platform=%s/%s).",
SDK_VERSION,
self.mode,
env,
Expand All @@ -145,6 +146,7 @@ def _log_startup_summary(self, env: str, use_remote_export: bool) -> None:
self._sampling_mode,
self._sampling_rate,
self._min_sampling_rate,
self._sampling_log_transitions,
get_log_level(),
platform.python_version(),
platform.system().lower(),
Expand Down Expand Up @@ -201,6 +203,7 @@ def initialize(
instance._sampling_rate = sampling_config.base_rate
instance._sampling_mode = sampling_config.mode
instance._min_sampling_rate = sampling_config.min_rate
instance._sampling_log_transitions = instance._determine_sampling_log_transitions()

# Start coverage collection after the _initialized guard so repeated
# initialize() calls don't stop/restart coverage and lose accumulated data.
Expand Down Expand Up @@ -408,6 +411,34 @@ def _determine_sampling_rate(self, init_param: float | None) -> float:
"""Backward-compatible helper that returns only the effective base sampling rate."""
return self._determine_sampling_config(init_param).base_rate

def _determine_sampling_log_transitions(self) -> bool:
"""Determine whether adaptive sampling transitions should be logged."""
recording_config = self.file_config.recording if self.file_config else None
config_sampling = recording_config.sampling if recording_config else None

env_value = os.environ.get("TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS")
if env_value is not None:
normalized = env_value.strip().lower()
if normalized in {"1", "true", "yes", "on"}:
logger.debug(
"Using adaptive sampling log_transitions from TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS env var: True"
)
return True
if normalized in {"0", "false", "no", "off"}:
logger.debug(
"Using adaptive sampling log_transitions from TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS env var: False"
)
return False
logger.warning(
"Invalid TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS env var: %s. Expected one of true/false/1/0/yes/no/on/off.",
env_value,
)

if config_sampling and config_sampling.log_transitions is not None:
return config_sampling.log_transitions

return True

def _start_adaptive_sampling_control_loop(self) -> None:
if self.mode != TuskDriftMode.RECORD or self._sampling_mode != "adaptive":
return
Expand All @@ -417,7 +448,8 @@ def _start_adaptive_sampling_control_loop(self) -> None:
mode="adaptive",
base_rate=self._sampling_rate,
min_rate=self._min_sampling_rate,
)
),
log_transitions=self._sampling_log_transitions,
)
self._effective_memory_limit_bytes = self._detect_effective_memory_limit_bytes()
self._adaptive_sampling_stop_event.clear()
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/test_adaptive_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,19 @@ def worker() -> None:
thread.join(timeout=1.0)
assert not thread.is_alive()
assert controller.get_decision(is_pre_app_start=False).state == "hot"


def test_controller_can_suppress_transition_logs(caplog):
now = {"value": 0.0}
controller = AdaptiveSamplingController(
ResolvedSamplingConfig(mode="adaptive", base_rate=0.5, min_rate=0.1),
log_transitions=False,
now_fn=lambda: now["value"],
)

with caplog.at_level("INFO"):
controller.update(AdaptiveSamplingHealthSnapshot(queue_fill_ratio=0.9))
now["value"] = 1.0
controller.update(AdaptiveSamplingHealthSnapshot(queue_fill_ratio=0.1))

assert "Adaptive sampling updated" not in caplog.text
2 changes: 2 additions & 0 deletions tests/unit/test_config_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def test_loads_nested_sampling_config(self):
mode: adaptive
base_rate: 0.25
min_rate: 0.05
log_transitions: false
"""
)

Expand All @@ -243,6 +244,7 @@ def test_loads_nested_sampling_config(self):
assert config.recording.sampling.mode == "adaptive"
assert config.recording.sampling.base_rate == 0.25
assert config.recording.sampling.min_rate == 0.05
assert config.recording.sampling.log_transitions is False
finally:
os.chdir(original_cwd)

Expand Down
37 changes: 36 additions & 1 deletion tests/unit/test_drift_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def reset_singleton(self):
"TUSK_DRIFT_MODE",
"TUSK_API_KEY",
"TUSK_RECORDING_SAMPLING_RATE",
"TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS",
"TUSK_SAMPLING_RATE",
"ENV",
]
Expand Down Expand Up @@ -129,7 +130,11 @@ def reset_singleton(self):
TuskDrift._instance = None
TuskDrift._initialized = False
# Clear sampling rate env vars
for env_var in ("TUSK_RECORDING_SAMPLING_RATE", "TUSK_SAMPLING_RATE"):
for env_var in (
"TUSK_RECORDING_SAMPLING_RATE",
"TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS",
"TUSK_SAMPLING_RATE",
):
if env_var in os.environ:
del os.environ[env_var]
yield
Expand Down Expand Up @@ -255,6 +260,35 @@ def test_invalid_recording_env_var_falls_back_to_legacy_alias(self, reset_single

assert result == 0.4

def test_uses_config_sampling_log_transitions_when_env_var_unset(self, reset_singleton):
"""Should use config file log_transitions when env var is not set."""
os.environ["TUSK_DRIFT_MODE"] = "DISABLED"
instance = TuskDrift.get_instance()
instance.file_config = TuskFileConfig(
recording=RecordingConfig(
sampling=SamplingConfig(log_transitions=False),
)
)

result = instance._determine_sampling_log_transitions()

assert result is False

def test_recording_log_transitions_env_var_overrides_config(self, reset_singleton):
"""Should prefer the env var over config file log_transitions."""
os.environ["TUSK_DRIFT_MODE"] = "DISABLED"
os.environ["TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS"] = "false"
instance = TuskDrift.get_instance()
instance.file_config = TuskFileConfig(
recording=RecordingConfig(
sampling=SamplingConfig(log_transitions=True),
)
)

result = instance._determine_sampling_log_transitions()

assert result is False


class TestTuskDriftInitialize:
"""Tests for TuskDrift.initialize method."""
Expand All @@ -269,6 +303,7 @@ def reset_singleton(self):
"TUSK_DRIFT_MODE",
"TUSK_API_KEY",
"TUSK_RECORDING_SAMPLING_RATE",
"TUSK_RECORDING_SAMPLING_LOG_TRANSITIONS",
"TUSK_SAMPLING_RATE",
"ENV",
]
Expand Down
Loading