forked from root-project/root
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TProofNodes.cxx
284 lines (257 loc) · 9.29 KB
/
TProofNodes.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
// @(#)root/proof:$Id$
// Author:
/*************************************************************************
* Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
//////////////////////////////////////////////////////////////////////////
// //
// TProofNode //
// //
// PROOF worker node information //
// //
//////////////////////////////////////////////////////////////////////////
#include "TProofNodes.h"
#include "TProof.h"
#include "TList.h"
#include "TMap.h"
#include "TObjString.h"
ClassImp(TProofNodes)
////////////////////////////////////////////////////////////////////////////////
/// Constructor
TProofNodes::TProofNodes(TProof* proof)
: fProof(proof), fNodes(0), fActiveNodes(0),
fMaxWrksNode(-1), fMinWrksNode(-1),
fNNodes(0), fNWrks(0), fNActiveWrks(0), fNCores(0)
{
Build();
}
////////////////////////////////////////////////////////////////////////////////
/// Destructor
TProofNodes::~TProofNodes()
{
if (fNodes) {
fNodes->SetOwner(kTRUE);
SafeDelete(fNodes);
}
}
////////////////////////////////////////////////////////////////////////////////
/// Desctiption: Build the node list, which is a list of nodes whose members
/// in turn are lists of workers on the node.
/// Input: Nothing
/// Return: Nothing
void TProofNodes::Build()
{
if (!fProof || !fProof->IsValid()) {
Warning("Build", "the PROOF instance is undefined or invalid! Cannot continue");
return;
}
if (fNodes){
fNodes->SetOwner(kTRUE);
SafeDelete(fNodes);
}
fNodes = new TMap;
fNodes->SetOwner(kTRUE);
TList *slaves = fProof->GetListOfSlaveInfos();
TIter nxtslave(slaves);
TSlaveInfo *si = 0;
TList *node = 0;
TPair *pair = 0;
while ((si = (TSlaveInfo *)(nxtslave()))) {
TSlaveInfo *si_copy = (TSlaveInfo *)(si->Clone());
if (!(pair = (TPair *) fNodes->FindObject(si->GetName()))) {
node = new TList;
//si's are owned by the member fSlaveInfo of fProof
node->SetOwner(kTRUE);
node->SetName(si_copy->GetName());
node->Add(si_copy);
fNodes->Add(new TObjString(si->GetName()), node);
} else {
node = (TList *) pair->Value();
node->Add(si_copy);
}
}
// Update counters and created active nodes map
if (fActiveNodes){
fActiveNodes->SetOwner(kTRUE);
SafeDelete(fActiveNodes);
}
fActiveNodes = new TMap;
fActiveNodes->SetOwner(kTRUE);
TList *actnode = 0;
fMaxWrksNode = -1;
fMinWrksNode = -1;
fNNodes = 0;
fNWrks = 0;
fNActiveWrks = 0;
TIter nxk(fNodes);
TObject *key = 0;
while ((key = nxk()) != 0) {
node = dynamic_cast<TList *>(fNodes->GetValue(key));
if (node) {
fNNodes++;
// Number of cores
si = (TSlaveInfo *) node->First();
fNCores += si->fSysInfo.fCpus;
// Number of workers
fNWrks += node->GetSize();
if (fMinWrksNode == -1 || (node->GetSize() < fMinWrksNode)) {
fMinWrksNode = node->GetSize();
}
if (fMaxWrksNode == -1 || (node->GetSize() > fMaxWrksNode)) {
fMaxWrksNode = node->GetSize();
}
TIter nxw(node);
while ((si = (TSlaveInfo *) nxw())) {
if (si->fStatus == TSlaveInfo::kActive) {
fNActiveWrks++;
TSlaveInfo *si_copy = (TSlaveInfo *)(si->Clone());
actnode = dynamic_cast<TList *>(fActiveNodes->GetValue(key));
if (actnode) {
actnode->Add(si_copy);
} else {
actnode = new TList;
actnode->SetOwner(kTRUE);
actnode->SetName(si_copy->GetName());
actnode->Add(si_copy);
fActiveNodes->Add(new TObjString(si->GetName()), actnode);
}
}
}
} else {
Warning("Build", "could not get list for node '%s'", key->GetName());
}
}
// Done
return;
}
////////////////////////////////////////////////////////////////////////////////
/// Description: Activate 'nwrks' workers; calls TProof::SetParallel and
/// rebuild the internal lists
/// Input: number of workers
/// Return: 0 is successful, <0 otherwise.
Int_t TProofNodes::ActivateWorkers(Int_t nwrks)
{
Int_t nw = fProof->SetParallel(nwrks);
if (nw > 0) {
if (nw != nwrks)
Warning("ActivateWorkers", "requested %d got %d", nwrks, nw);
Build();
}
return nw;
}
////////////////////////////////////////////////////////////////////////////////
/// Description: Activate the same number of workers on all nodes.
/// Input: workers: string of the form "nx" where non-negative integer n
/// is the number of worker on each node to be activated.
/// Return: The number of active workers per node when the operation is
/// successful.
/// <0 otherwise.
Int_t TProofNodes::ActivateWorkers(const char *workers)
{
TString toactivate;
TString todeactivate;
// The TProof::ActivateWorker/TProof::DeactivateWorker functions were fixed /
// improved starting with protocol version 33
Bool_t protocol33 = kTRUE;
if (fProof->GetRemoteProtocol() < 33 || fProof->GetClientProtocol() < 33) {
protocol33 = kFALSE;
// This resets the lists to avoid the problem fixed in protocol 33
fProof->SetParallel(0);
}
//Make sure worker list is up-to-date
Build();
TString sworkers = TString(workers).Strip(TString::kTrailing, 'x');
if (!sworkers.IsDigit()) {
Error("ActivateWorkers", "wrongly formatted argument: %s - cannot continue", workers);
return -1;
}
Int_t nworkersnode = sworkers.Atoi();
Int_t ret = nworkersnode;
TSlaveInfo *si = 0;
TList *node = 0;
TObject *key = 0;
TIter nxk(fNodes);
while ((key = nxk()) != 0) {
if ((node = dynamic_cast<TList *>(fNodes->GetValue(key)))) {
TIter nxtworker(node);
Int_t nactiveworkers = 0;
while ((si = (TSlaveInfo *)(nxtworker()))) {
if (nactiveworkers < nworkersnode) {
if (si->fStatus == TSlaveInfo::kNotActive) {
if (protocol33) {
toactivate += TString::Format("%s,", si->GetOrdinal());
} else {
fProof->ActivateWorker(si->GetOrdinal());
}
}
nactiveworkers++;
} else {
if (si->fStatus == TSlaveInfo::kActive) {
if (protocol33) {
todeactivate += TString::Format("%s,", si->GetOrdinal());
} else {
fProof->DeactivateWorker(si->GetOrdinal());
}
}
}
}
} else {
Warning("ActivateWorkers", "could not get list for node '%s'", key->GetName());
}
}
if (!todeactivate.IsNull()) {
todeactivate.Remove(TString::kTrailing, ',');
if (fProof->DeactivateWorker(todeactivate) < 0) ret = -1;
}
if (!toactivate.IsNull()) {
toactivate.Remove(TString::kTrailing, ',');
if (fProof->ActivateWorker(toactivate) < 0) ret = -1;
}
if (ret < 0) {
Warning("ActivateWorkers", "could not get the requested number of workers per node (%d)",
nworkersnode);
return ret;
}
// Rebuild
Build();
// Build() destroyes fNodes so we need to re-create the iterator, resetting is not enough ...
TIter nxkn(fNodes);
while ((key = nxkn()) != 0) {
if ((node = dynamic_cast<TList *>(fNodes->GetValue(key)))) {
TIter nxtworker(node);
Int_t nactiveworkers = 0;
while ((si = (TSlaveInfo *)(nxtworker()))) {
if (si->fStatus == TSlaveInfo::kActive) nactiveworkers++;
}
if (nactiveworkers != nworkersnode) {
Warning("ActivateWorkers", "only %d (out of %d requested) workers "
"were activated on node %s",
nactiveworkers, nworkersnode, node->GetName());
ret = -1;
}
} else {
Warning("ActivateWorkers", "could not get list for node '%s'", key->GetName());
}
}
// Done
return ret;
}
////////////////////////////////////////////////////////////////////////////////
/// Description: Print node information.
void TProofNodes::Print(Option_t* option) const
{
TIter nxk(fNodes);
TObject *key = 0;
while ((key = nxk()) != 0) {
TList *node = dynamic_cast<TList *>(fNodes->GetValue(key));
if (node) {
node->Print(option);
} else {
Warning("Print", "could not get list for node '%s'", key->GetName());
}
}
}