diff --git a/smdebug/core/locations.py b/smdebug/core/locations.py index 671951e69..ecb55642c 100644 --- a/smdebug/core/locations.py +++ b/smdebug/core/locations.py @@ -6,6 +6,7 @@ # First Party from smdebug.profiler.profiler_constants import ( + CONVERT_TO_MICROSECS, DEFAULT_PREFIX, PYTHONTIMELINE_SUFFIX, TRACE_DIRECTORY_FORMAT, @@ -120,7 +121,9 @@ class TraceFileLocation: @staticmethod def get_file_location(timestamp, base_dir): env_base_location = base_dir - date_hour = time.strftime(TRACE_DIRECTORY_FORMAT, time.gmtime(timestamp)) + date_hour = time.strftime( + TRACE_DIRECTORY_FORMAT, time.gmtime(timestamp / CONVERT_TO_MICROSECS) + ) timestamp = int(round(timestamp)) worker_id = get_node_id() file_path = os.path.join( diff --git a/smdebug/core/tfevent/timeline_file_writer.py b/smdebug/core/tfevent/timeline_file_writer.py index e79d22e92..f212fef36 100644 --- a/smdebug/core/tfevent/timeline_file_writer.py +++ b/smdebug/core/tfevent/timeline_file_writer.py @@ -180,9 +180,11 @@ def __init__( self.tensor_table = collections.defaultdict(int) self.continuous_fail_count = 0 self.is_first = True - self.last_event_end_time = int(round(base_start_time / CONVERT_TO_MICROSECS)) + self.last_event_end_time = int(round(base_start_time)) self.last_file_close_time = self.last_event_end_time - self.cur_hour = datetime.utcfromtimestamp(self.last_file_close_time).hour + self.cur_hour = datetime.utcfromtimestamp( + self.last_file_close_time / CONVERT_TO_MICROSECS + ).hour self._healthy = True self._profiler_config_parser = profiler_config_parser self.node_id = get_node_id() @@ -192,7 +194,7 @@ def run(self): # if there is long interval between 2 events, just keep checking if # the file is still open. if it is open for too long and a new event # has not occurred, close the open file based on rotation policy. - if self._writer and self._should_rotate_now(time.time()): + if self._writer and self._should_rotate_now(time.time() * CONVERT_TO_MICROSECS): self.close() event = self._queue.get() @@ -227,14 +229,15 @@ def open(self, path, cur_event_end_time): self.is_first = True self._writer.write("[\n") self._healthy = True - self.cur_hour = datetime.utcfromtimestamp(cur_event_end_time).hour + self.cur_hour = datetime.utcfromtimestamp(cur_event_end_time / CONVERT_TO_MICROSECS).hour return True def _get_rotation_info(self, now): file_size = self.file_size() + now = now / CONVERT_TO_MICROSECS # find the difference between the now and last file closed time (in seconds) - diff_in_seconds = int(round(now - self.last_file_close_time)) + diff_in_seconds = int(round(now - (self.last_file_close_time / CONVERT_TO_MICROSECS))) now_datehour = datetime.utcfromtimestamp(now) @@ -271,9 +274,7 @@ def write_event(self, record): Close file if file size exceeds $ENV_MAX_FILE_SIZE or folder was created more than $ENV_CLOSE_FILE_INTERVAL time duration. """ - end_time_for_event = ( - record.event_end_ts_micros / CONVERT_TO_MICROSECS - ) # convert back to secs + end_time_for_event = record.event_end_ts_micros # check if any of the rotation policies have been satisfied. close the existing # trace file and open a new one @@ -329,7 +330,7 @@ def write_event(self, record): # write the trace event record position_and_length_of_record = self._writer.write(record.to_json() + ",\n") self.flush() - self.last_event_end_time = int(round(record.event_end_ts_micros / CONVERT_TO_MICROSECS)) + self.last_event_end_time = record.event_end_ts_micros return position_and_length_of_record def flush(self): diff --git a/tests/core/test_timeline_writer.py b/tests/core/test_timeline_writer.py index 9f970132e..c31263fa5 100644 --- a/tests/core/test_timeline_writer.py +++ b/tests/core/test_timeline_writer.py @@ -4,6 +4,7 @@ import multiprocessing as mp import os import time +from datetime import datetime from pathlib import Path # Third Party @@ -13,7 +14,11 @@ # First Party from smdebug.core.tfevent.timeline_file_writer import TimelineFileWriter from smdebug.profiler.profiler_config_parser import ProfilerConfigParser -from smdebug.profiler.profiler_constants import CONVERT_TO_MICROSECS, DEFAULT_PREFIX +from smdebug.profiler.profiler_constants import ( + CONVERT_TO_MICROSECS, + DEFAULT_PREFIX, + TRACE_DIRECTORY_FORMAT, +) @pytest.fixture() @@ -58,6 +63,15 @@ def test_create_timeline_file(simple_profiler_config_parser, out_dir): assert len(files) == 1 + file_ts = files[0].name.split("_")[0] + folder_name = files[0].parent.name + assert folder_name == time.strftime( + TRACE_DIRECTORY_FORMAT, time.gmtime(int(file_ts) / CONVERT_TO_MICROSECS) + ) + assert folder_name == datetime.strptime(folder_name, TRACE_DIRECTORY_FORMAT).strftime( + TRACE_DIRECTORY_FORMAT + ) + with open(files[0]) as timeline_file: events_dict = json.load(timeline_file)