forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
timeline.h
85 lines (68 loc) · 2.86 KB
/
timeline.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// Copyright 2018 Uber Technologies, Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef HOROVOD_TIMELINE_H
#define HOROVOD_TIMELINE_H
#include <chrono>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include <mutex>
#include "common.h"
#include "mpi_message.h"
namespace horovod {
namespace common {
// How frequently Horovod Timeline should be flushed to disk.
#define TIMELINE_FLUSH_TIME std::chrono::seconds(1)
enum TimelineState { UNKNOWN, NEGOTIATING, TOP_LEVEL, ACTIVITY };
// Writes timeline in Chrome Tracing format. Timeline spec is from:
// https://github.com/catapult-project/catapult/tree/master/tracing
class Timeline {
public:
void Initialize(std::string file_name);
bool Initialized() const;
void NegotiateStart(const std::string& tensor_name,
const MPIRequest::RequestType request_type);
void NegotiateRankReady(const std::string& tensor_name, const int rank);
void NegotiateEnd(const std::string& tensor_name);
void Start(const std::string& tensor_name,
const MPIResponse::ResponseType response_type);
void ActivityStart(const std::string& tensor_name,
const std::string& activity);
void ActivityEnd(const std::string& tensor_name);
void End(const std::string& tensor_name, const std::shared_ptr<Tensor> tensor);
private:
void WriteEvent(const std::string& tensor_name, const char phase,
const std::string& op_name = "",
const std::string& args = "");
// Boolean flag indicating whether Timeline was initialized (and thus should
// be recorded).
bool initialized_ = false;
// Time point when Horovod was started.
std::chrono::steady_clock::time_point start_time_;
// Last time stream was flushed.
std::chrono::steady_clock::time_point last_flush_time_;
// Timeline file.
std::ofstream file_;
// A mutex that guards timeline state from concurrent access.
std::recursive_mutex mutex_;
// Mapping of tensor names to indexes. It is used to reduce size of the
// timeline file.
std::unordered_map<std::string, int> tensor_table_;
// Current state of each tensor in the timeline.
std::unordered_map<std::string, TimelineState> tensor_states_;
};
} // namespace common
} // namespace horovod
#endif // HOROVOD_TIMELINE_H