diff --git a/codeguru_profiler_agent/model/profile.py b/codeguru_profiler_agent/model/profile.py index 82f5cb3..929defc 100644 --- a/codeguru_profiler_agent/model/profile.py +++ b/codeguru_profiler_agent/model/profile.py @@ -1,4 +1,5 @@ import time +import logging from random import SystemRandom from codeguru_profiler_agent.model.call_graph_node import CallGraphNode @@ -9,6 +10,7 @@ ROOT_NODE_NAME = "ALL" +logger = logging.getLogger(__name__) class Profile: def __init__(self, profiling_group_name, sampling_interval_seconds, host_weight, start, agent_debug_info, clock=time.time): @@ -56,11 +58,40 @@ def get_active_millis_since_start(self): This returns the total "active" wall clock time since start. In AWS lambda the process can be frozen, the time while we are frozen should not be counted in here. In an EC2 or other type of host it is simply the wall clock time since start. - If the "end" time has been set, we give the active time between start and end otherwise between start and now + + Previously self.end was used to calculate active_millis_since_start but self.end is updated when a sample is added + so in rare cases where a sample is collected before the last pause time then we might add additional pause time + which can lead to incorrect calculation of active time. In worst cases, it can even lead to negative values depending + on the pause time. + + Below is an example indicating the potential error of considering self.end in the calculation. + ------------------------------------------------------------------ + | | | | | | | | |||||| + S P R P R SE P R REPORT + s1_________|___p1___|____|____p2___|____e1____|___p3__|__________| + + S - Start + P - Pause + R - Resume + SE - self.end (last sample) + REPORT - Calculating active time. + + If we consider self.end which is e1 in above case then active time would be e1-s1-(p1+p2+p3). But pause p3 is after e1 + so that leads to incorrect calculation of active time. + + Ideally we would want to set profile_end to be last sample time and subtract pause times only before that but it requires + additional work in maintaining pause time which isn't worth as it makes the logic complex with very little gain. + + So we are setting it to current time and in some corner cases to last_pause time. """ - end = self.last_pause if self.last_pause is not None else \ - self._end if self._end is not None else current_milli_time(clock=self._clock) - return end - self.start - self._paused_ms + end = self.last_pause if self.last_pause is not None else current_milli_time(clock=self._clock) + active_time_millis_since_start = end - self.start - self._paused_ms + logger.debug( + "Active time since start is {activeTime} which is calculated using start: {start}, end: {end}, last_pause: {last_pause}, paused_ms: {paused_ms}, last_resume: {last_resume}" + .format(activeTime = active_time_millis_since_start, start = self.start, end = self._end, last_pause = self.last_pause, paused_ms = self._paused_ms, last_resume = self.last_resume) + ) + + return active_time_millis_since_start def add(self, sample): """ @@ -113,8 +144,9 @@ def resume(self): # resume gets called when profile is running return self.last_resume = current_milli_time(clock=self._clock) - self._paused_ms += self.last_resume - self.last_pause + prev_last_pause = self.last_pause self.last_pause = None + self._paused_ms += self.last_resume - prev_last_pause def is_empty(self): return self.total_seen_threads_count == 0.0 diff --git a/test/unit/model/test_profile.py b/test/unit/model/test_profile.py index 07c0537..895dd48 100644 --- a/test/unit/model/test_profile.py +++ b/test/unit/model/test_profile.py @@ -420,10 +420,10 @@ def before(self): self.subject.resume() self.turn_clock(seconds=5) self.subject.end = self.mock_clock() * 1000 - self.turn_clock(seconds=1) # that extra second should not be included in the duration + self.turn_clock(seconds=5) def test_it_returns_duration_up_to_end_time(self): - assert self.subject.get_active_millis_since_start() == 14000 + assert self.subject.get_active_millis_since_start() == 19000 class TestSetOverhead(TestProfile): diff --git a/test/unit/sdk_reporter/test_sdk_profile_encoder.py b/test/unit/sdk_reporter/test_sdk_profile_encoder.py index 2332a63..cf6df1b 100644 --- a/test/unit/sdk_reporter/test_sdk_profile_encoder.py +++ b/test/unit/sdk_reporter/test_sdk_profile_encoder.py @@ -23,9 +23,9 @@ def example_profile(): start_time = 1514764800000 - end_time = 1514772000000 + end_time = 1514771000000 profile = Profile(profiling_group_name="TestProfilingGroupName", sampling_interval_seconds=1.0, host_weight=2, - start=start_time, agent_debug_info=AgentDebugInfo(errors_metadata), clock=lambda: 1514772000000) + start=start_time, agent_debug_info=AgentDebugInfo(errors_metadata), clock=lambda: 1514772000) profile.add( Sample(stacks=[[Frame("bottom"), Frame("middle"), Frame("top")], [Frame("bottom"), Frame("middle"), Frame("different_top")], @@ -89,7 +89,7 @@ def test_it_includes_the_start_time_from_the_profile_in_epoch_millis( def test_it_includes_the_end_time_from_the_profile_in_epoch_millis( self): - assert (self.decoded_json_result()["end"] == 1514772000000) + assert (self.decoded_json_result()["end"] == 1514771000000) def test_it_includes_the_agent_info_in_the_agent_metadata(self): assert (self.decoded_json_result()["agentMetadata"]["agentInfo"]["type"] ==