-
Notifications
You must be signed in to change notification settings - Fork 5
/
index.cc
3831 lines (3439 loc) · 123 KB
/
index.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#define __STDC_FORMAT_MACROS 1
/// Flag indicating whether and how much to debug.
int debug = 0;
#define DEBUG(x, msg) if (debug >= x) std::cout << "DEBUG[" << x << "]: " << msg << std::flush
#include "DQM/StreamSample.pb.h"
#include "DQM/VisDQMIndex.h"
#include "DQM/VisDQMCache.h"
#include "DQM/VisDQMFile.h"
#include "DQM/VisDQMError.h"
#include "DQM/VisDQMTools.h"
#include "DQM/StringAtom.h"
#include "DQM/Standalone.h"
#include "DQM/DQMStore.h"
#include "DQM/MonitorElement.h"
#include "classlib/utils/Regexp.h"
#include "classlib/utils/RegexpMatch.h"
#include "classlib/utils/StringFormat.h"
#include "classlib/utils/StringOps.h"
#include "classlib/utils/DebugAids.h"
#include "classlib/utils/Error.h"
#include "classlib/iobase/Filename.h"
#include "classlib/iobase/FileError.h"
#include "classlib/iobase/File.h"
#include "classlib/zip/MD5Digest.h"
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/gzip_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include "TH1.h"
#include "TAxis.h"
#include "TROOT.h"
#include <cerrno>
#include <cstdlib>
#include <cfloat>
#include <iostream>
#include <fstream>
#include <list>
#include <fcntl.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdlib.h>
#include <float.h>
using namespace lat;
using google::protobuf::io::FileInputStream;
using google::protobuf::io::FileOutputStream;
using google::protobuf::io::GzipInputStream;
using google::protobuf::io::GzipOutputStream;
using google::protobuf::io::CodedInputStream;
using google::protobuf::io::ArrayInputStream;
using google::protobuf::io::StringOutputStream;
// ----------------------------------------------------------------------
/** Index task to perform. */
enum TaskType
{
TASK_CREATE, //< Create and initialise a new index.
TASK_ADD, //< Add data to an index.
TASK_REMOVE, //< Remove data from the index.
TASK_MERGE, //< Merge an index to another.
TASK_DUMP, //< Dump the index contents.
TASK_STREAM, //< Stream a sample from the index into an intermediate .dat file.
TASK_STREAMPB, //< Stream a sample from the index into an intermediate ProtocolBuffer .pb file.
TASK_FIXSTREAMERS //< Add missing streamerinfo to oldest ones.
};
/** Things user can choose to dump out. */
enum DumpType
{
DUMP_CATALOGUE, //< Dump the master catalogue.
DUMP_INFO, //< Dump monitor element summary information.
DUMP_DATA, //< Dump monitor element serialised data.
DUMP_ALL //< Dump everything.
};
/** Classification of what to do with a monitor element. */
enum MEClass
{
ME_CLASS_BAD, //< Did not match expected naming conventions.
ME_CLASS_SKIP, //< Not interesting or already handled some other way.
ME_CLASS_KEEP //< Index this monitor element.
};
/** Classification of a sample to a broad class of DQM data types. */
enum DataType
{
TYPE_OTHER, //< Type not recognised or not determined.
TYPE_DATA, //< DQM data for real detector data.
TYPE_RELVAL, //< DQM data for release validation simulated data.
TYPE_RELVAL_RUNDEPMC, //< DQM data for release validation RunDependent simulated data.
TYPE_MC, //< DQM data for other simulated data.
TYPE_RUNDEPMC //< DQM data for Run Dependent simulated data.
};
enum CompressionFactor
{
NO_COMPRESSION = 0,
FAST_COMPRESSION = 4,
DEFAULT_COMPRESSION = 6,
MAX_COMPRESSION = 9
};
/** Classification of DQM data into a unique DQM GUI "sample". */
struct SampleInfo
{
uint32_t index; //< Index of this sample among new files.
int32_t runnr; //< Run number, 0 for mc, > 0 for data.
DataType type; //< Broad type class of underlying data.
std::string dataset; //< Full 3-part dataset name.
std::string version; //< CMSSW version string for relval data.
};
/** Classification of input files to DQM samples. */
struct FileInfo
{
Filename path; //< Path name of the input file (or File name of the root file inside a zip archive).
Filename fullpath; //< File name of the zip archive and ROOT filename.
Filename container; //< File name of the zip archive, if any, or of the root file.
SampleInfo *sample; //< Sample classification of this file.
};
/** Information extracted from a monitor element and stored in the index. */
struct MonitorElementInfo
{
/** Properties as bit flags (#VisDQMIndex::Summary::properties). */
uint32_t flags;
/** Detector tag (#VisDQMIndex::Summary::tag). */
uint32_t tag;
/** Pointer to the ROOT object and a reference if any. */
TObject *object[2];
/** Index in the StringAtomTree holding the ROOT information to be
stored in the index */
size_t streamidx;
/** DQM sample sub-catagory: 0 for run, 1 for lumi section range summary.
Used to create a part of the 64-bit key in the index. */
uint32_t category;
/** First lumi section for a lumi section range summary. Zero if
this is not a lumi section range summary object. Ignored. */
uint32_t lumibegin;
/** Last lumi section for a lumi section range summary. Zero if
this is not a lumi section range summary object. Used to
create part of the 64-bit key in the index. */
uint32_t lumiend;
/** Full 64-bit key for this monitor element when stored into the
index, constructed as per documentation in #VisDQMIndex. The
value is filled in only when we are about to insert data into a
file in the index. */
IndexKey nameidx;
/** Full path name of the monitor element, without the decorations
added by #DQMStore when saving the objects in a ROOT file. This
is used to create a part of the 64-bit key in the index. */
std::string name;
/** For scalar monitor elements the string representation of the
value; stores only the value, without any of the decorations
added by #DQMStore or #MonitorElement. Empty for non-scalar
monitor elements. Stored in the summary immediately after
the #VisDQMIndex::Summary object. */
std::string data;
/** The quality report results attached to this monitor element.
Each result is a tuple of four strings terminated in a null
character, with an extra null to terminate the tuple (note: this
means the string has several embedded nulls!). The first tuple
member is the quality report status (an integer formatted as a
string), the second the quality report result (a real formatted
as a string), the third the quality test name, and the fourth
the quality test message. Stored in the summary after the
scalar data, after the #VisDQMIndex::Summary object. */
std::string qreports;
/** ROOT histogram statistics: number of entries. */
double nentries;
/** ROOT histogram statistics: number of bins in x, y and z. */
uint32_t nbins[3];
/** ROOT histogram statistics: mean in x, y and z. */
double mean[3];
/** ROOT histogram statistics: standard deviation in x, y and z. */
double rms[3];
/** ROOT histogram statistics: min and max bounds in x, y and z. */
double bounds[3][2];
/** ROOT histogram statistics: number of bytes of serialised data. */
uint32_t ndata;
};
/** Comparison operator for arranging monitor elements by their output
order in the file, mainly by the name index in the table. */
class OrderByNameIndex
{
std::vector<MonitorElementInfo> &minfo_;
public:
OrderByNameIndex(std::vector<MonitorElementInfo> &minfo)
: minfo_(minfo)
{}
bool operator()(uint32_t a, uint32_t b)
{ return minfo_[a].nameidx < minfo_[b].nameidx; }
};
/// Name of this program for diagnostic messages.
Filename app;
/// Regular expression to recognise a stream file.
Regexp rxstreamfile(".*\\.dat$");
/// Regular expression to recognise a Protocol Buffer stream file.
Regexp rxstreampbfile(".*\\.pb$");
/// Regular expression to recognise valid dataset names.
Regexp rxdataset("^(/[-A-Za-z0-9_]+){3}$");
/// Regular expression to recognise valid online "real data" DQM files.
/// The first capture is the run number string.
Regexp rxonline("^(?:.*/)?DQM_V\\d+(?:_[A-Za-z0-9]+)?_R(\\d+)\\.(dat|pb|root)$");
/// Regular expression to recognise valid offline DQM data files. The
/// first capture is the run number, the second mangled dataset name.
Regexp rxoffline("^(?:.*/)?DQM_V\\d+_R(\\d+)((?:__[-A-Za-z0-9_]+){3})\\.(dat|pb|root)$");
/// Regular expression to recognise release validation dataset names.
/// The first capture is the CMSSW release string.
Regexp rxrelval("^/RelVal[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*$");
/// Regular expression to recognise Run Dependent Monte Carlo samples
/// that are not RelVal. The first capture is the CMSSW release
/// string.
Regexp rxrundepmc("^/(?!RelVal)[^/]+/.*rundepMC.*$");
/// Regular expression to recognise release validation Run Dependent
/// Monte Carlo samples. The first capture is the CMSSW release
/// string.
Regexp rxrelvalrundepmc("^/RelVal[^/]+/(CMSSW(?:_[0-9]+)+(?:_pre[0-9]+)?)[-_].*rundepMC.*$");
static const std::string MEINFOBOUNDARY("____MEINFOBOUNDARY____");
static const std::string MEROOTBOUNDARY("____MEROOTBOUNDARY____");
static const size_t ALL_SAMPLES = ~(size_t)0;
static const size_t LAST_STREAMER = ~(size_t)0;
// ----------------------------------------------------------------------
/** Utility function to round @a value to a value divisible by @a unit. */
static inline uint32_t
myroundup(uint32_t value, uint32_t unit)
{
return (value + unit - 1) / unit * unit;
}
static bool isStreamFile(const char* filename)
{
return rxstreamfile.exactMatch(filename);
}
static bool isStreamPBFile(const char* filename)
{
return rxstreampbfile.exactMatch(filename);
}
/** Utility function to read a double from a stream. */
static inline void readDouble(std::ifstream &iread, double *into)
{
std::string tmp;
iread >> tmp;
*into = strtod(tmp.c_str(), 0);
}
/** Utility function to write a double into a stream. */
static inline void writeDouble(std::ofstream &iwrite,
const char *prefix,
double val)
{
char buf[64];
snprintf(buf, sizeof(buf), "%s%.*g", prefix, DBL_DIG+2, val);
iwrite << buf;
}
template<class OriginalMessage, class CompressedMessage>
void CompressMessage(const OriginalMessage& source,
CompressionFactor k,
CompressedMessage* dest) {
std::string result;
{
StringOutputStream output(&result);
GzipOutputStream::Options options;
options.format = GzipOutputStream::GZIP;
options.compression_level = k;
GzipOutputStream gzout(&output,
options);
source.SerializeToZeroCopyStream(&gzout);
gzout.Flush();
}
dest->set_size(result.size());
dest->set_buff(result);
}
template<class CompressedMessage, class Message>
void UncompressMessage(const CompressedMessage& source, Message* dest) {
const int size = source.size();
ArrayInputStream input(source.buff().data(), size);
GzipInputStream gzin(&input);
CodedInputStream input_coded(&gzin);
input_coded.SetTotalBytesLimit(512*1024*1024, -1);
if (!dest->ParseFromCodedStream(&input_coded))
throw VisDQMError(0, "UncompressMessage","failed to uncompress message");
}
// ----------------------------------------------------------------------
/** Classify and extract monitor element object properties from @a obj
originating from @a store and sample @a si. The result is stored
into @a info. The return value indicates whether the monitor
element was recognised and if so, whether it should be stored into
the DQM index, or ignored as uninteresting (e.g. ReleaseTag) or
already handled (e.g. references). */
static MEClass
classifyMonitorElement(DQMStore & /* store */,
MonitorElement &obj,
MonitorElementInfo &info,
VisDQMIndex::Sample &meta,
const SampleInfo &si)
{
info.flags = 0;
info.tag = 0;
info.streamidx = 0;
info.object[0] = 0;
info.object[1] = 0;
info.category = 0;
info.lumibegin = 0;
info.lumiend = 0;
info.nameidx = IndexKey();
info.name.clear();
info.data.clear();
info.qreports.clear();
info.nentries = 0;
for (int i = 0; i < 3; ++i)
{
info.nbins[i] = 0;
info.mean[i] = 0;
info.rms[i] = 0;
info.bounds[i][0] = 0;
info.bounds[i][1] = 0;
}
info.ndata = 0;
// Skip references, they are handled when reading in data.
const std::string &name = obj.getFullname();
if (name.size() > 10 && name.compare(0, 10, "Reference/") == 0)
return ME_CLASS_SKIP;
// Validate "Run XYZ/System/Category/Name" format where category is
// either "Run summary" or "By Lumi Section <N>-<M>". Ignore buggy
// "Run XYZ/Reference/Name" entries.
size_t slash, sys, cat;
if (name.compare(0, 4, "Run ") == 0
&& (slash = name.find('/')) < name.size()-1
&& (sys = name.find('/', slash+1)) < name.size()-1
&& (cat = name.find('/', sys+1)) < name.size()-1)
{
if (name.compare(slash+1, 10, "Reference/") == 0)
return ME_CLASS_SKIP;
errno = 0;
char *end = 0;
int32_t runnr = strtol(name.c_str()+4, &end, 10);
if (errno != 0 || !end || *end != '/' || runnr < 0)
return ME_CLASS_BAD;
if (si.type == TYPE_DATA && si.runnr != runnr)
return ME_CLASS_BAD;
if ((si.type == TYPE_RELVAL || si.type == TYPE_MC) && runnr != 1)
return ME_CLASS_BAD;
if ((si.type == TYPE_RELVAL_RUNDEPMC || si.type == TYPE_RUNDEPMC)
&& runnr == 1)
return ME_CLASS_BAD;
std::string category(name, sys+1, cat-sys-1);
if (category == "Run summary")
info.category = 0;
else if (category.compare(0, 16, "By Lumi Section ") == 0)
{
info.category = 1;
end = 0;
errno = 0;
info.lumibegin = strtoul(category.c_str()+16, &end, 10);
if (errno != 0 || !end || *end != '-'
|| ! isdigit((unsigned) end[1]))
return ME_CLASS_BAD;
info.lumiend = strtoul(end+1, &end, 10);
if (errno != 0 || !end || *end)
return ME_CLASS_BAD;
}
else
return ME_CLASS_BAD;
}
else if (name == "ReleaseTag")
return ME_CLASS_SKIP;
else
return ME_CLASS_BAD;
// Now extract all data into "info". Save the undecorated full
// path, any detector tag, quality test results, and various flags.
info.name.reserve(name.size());
info.name.append(name, slash+1, sys-slash-1);
info.name.append(1, '/');
info.name.append(name, cat+1, std::string::npos);
if (uint32_t tag = obj.getTag())
{
info.tag = tag;
info.flags |= VisDQMIndex::SUMMARY_PROP_TAGGED;
}
obj.packQualityData(info.qreports);
if (obj.hasError())
info.flags |= VisDQMIndex::SUMMARY_PROP_REPORT_ERROR;
if (obj.hasWarning())
info.flags |= VisDQMIndex::SUMMARY_PROP_REPORT_WARN;
if (obj.hasOtherReport())
info.flags |= VisDQMIndex::SUMMARY_PROP_REPORT_OTHER;
if (obj.isEfficiency())
info.flags |= VisDQMIndex::SUMMARY_PROP_EFFICIENCY_PLOT;
// Extract the actual object value and fill in type flag in
// properties. Save scalar value as a string - just the value not
// the decorations normally added by DQMStore or MonitorElement -
// and extract the ROOT object, any reference and high-level
// statistics for histogram type objects.
switch (obj.kind())
{
default:
ASSERT(false);
return ME_CLASS_BAD;
case MonitorElement::DQM_KIND_INVALID:
info.flags |= obj.kind();
return ME_CLASS_BAD;
case MonitorElement::DQM_KIND_INT:
case MonitorElement::DQM_KIND_REAL:
case MonitorElement::DQM_KIND_STRING:
info.flags |= obj.kind();
obj.packScalarData(info.data, "");
if (info.name.size() > 23
&& info.name.compare(info.name.size()-23, 23, "/EventInfo/iLumiSection") == 0)
meta.numLumiSections = std::max(meta.numLumiSections, uint64_t(atoll(info.data.c_str())));
else if (info.name.size() > 27
&& info.name.compare(info.name.size()-27, 27, "/EventInfo/processTimeStamp") == 0)
meta.processedTime = std::max(meta.processedTime, uint64_t(atof(info.data.c_str())*1e9));
else if (info.name.size() > 26
&& info.name.compare(info.name.size()-26, 26, "/EventInfo/processedEvents") == 0)
meta.numEvents = std::max(meta.numEvents, uint64_t(atoll(info.data.c_str())));
else if (info.name.size() > 28
&& info.name.compare(info.name.size()-28, 28, "/EventInfo/runStartTimeStamp") == 0)
meta.runStartTime = std::max(meta.runStartTime, uint64_t(atof(info.data.c_str())*1e9));
break;
case MonitorElement::DQM_KIND_TH1F:
case MonitorElement::DQM_KIND_TH1S:
case MonitorElement::DQM_KIND_TH1I:
case MonitorElement::DQM_KIND_TH1D:
case MonitorElement::DQM_KIND_TH2F:
case MonitorElement::DQM_KIND_TH2S:
case MonitorElement::DQM_KIND_TH2D:
case MonitorElement::DQM_KIND_TH2I:
case MonitorElement::DQM_KIND_TH3F:
case MonitorElement::DQM_KIND_TPROFILE:
case MonitorElement::DQM_KIND_TPROFILE2D:
info.flags |= obj.kind();
info.object[0] = obj.getRootObject();
if ((info.object[1] = obj.getRefRootObject()))
info.flags |= VisDQMIndex::SUMMARY_PROP_HAS_REFERENCE;
if (TH1 *h = dynamic_cast<TH1 *>(info.object[0]))
{
info.nentries = h->GetEntries();
info.nbins[0] = h->GetNbinsX();
info.nbins[1] = h->GetNbinsY();
info.nbins[2] = h->GetNbinsZ();
if (TAxis *a = h->GetXaxis())
{
info.mean[0] = h->GetMean(1);
info.rms[0] = h->GetRMS(1);
info.bounds[0][0] = a->GetXmin();
info.bounds[0][1] = a->GetXmax();
}
if (TAxis *a = h->GetYaxis())
{
info.mean[1] = h->GetMean(2);
info.rms[1] = h->GetRMS(2);
info.bounds[1][0] = a->GetXmin();
info.bounds[1][1] = a->GetXmax();
}
if (TAxis *a = h->GetZaxis())
{
info.mean[2] = h->GetMean(3);
info.rms[2] = h->GetRMS(3);
info.bounds[2][0] = a->GetXmin();
info.bounds[2][1] = a->GetXmax();
}
}
break;
}
// We are done, tell caller to keep this object.
return ME_CLASS_KEEP;
}
struct MEClassifyTask
{
FileInfo &fi;
DQMStore &store;
std::vector<MonitorElement *> &mes;
std::vector<MonitorElementInfo> minfo;
VisDQMIndex::Sample meta;
int nerrors;
pthread_t thread;
size_t begin;
size_t end;
MEClassifyTask(FileInfo &f, DQMStore &s, std::vector<MonitorElement*> &m)
: fi(f), store(s), mes(m)
{}
};
static void *
classifyMonitorElementRange(void *arg)
{
MEClassifyTask &task = *(MEClassifyTask *) arg;
DEBUG(1, "classifying range [" << task.begin << ", " << task.end
<< ") in thread " << std::hex << (unsigned long) pthread_self()
<< std::dec << "\n");
for (size_t m = task.begin, e = task.end; m < e; ++m)
{
MonitorElementInfo info;
switch (classifyMonitorElement(task.store, *task.mes[m], info,
task.meta, *task.fi.sample))
{
case ME_CLASS_BAD:
task.nerrors++;
std::cerr << task.fi.path.name()
<< ": " << task.mes[m]->getFullname()
<< ": warning: monitor element not recognised\n";
continue;
case ME_CLASS_SKIP:
task.nerrors++;
DEBUG(3, "skipping monitor element '"
<< task.mes[m]->getFullname() << "'\n");
continue;
case ME_CLASS_KEEP:
DEBUG(3, ""
<< "flags=" << std::hex << info.flags << std::dec
<< " tag=" << info.tag
<< " object[0]=" << (void *) info.object[0]
<< " object[1]=" << (void *) info.object[1]
<< " category=" << info.category
<< " lumi=" << info.lumibegin << ':' << info.lumiend
<< " name='" << info.name
<< "' data='" << info.data
<< "' qreports='" << info.qreports
<< "' nentries=" << std::setprecision(DBL_DIG+2) << info.nentries
<< " nbins=" << info.nbins[0]
<< "/" << info.nbins[1]
<< "/" << info.nbins[2]
<< " mean=" << info.mean[0]
<< "/" << info.mean[1]
<< "/" << info.mean[2]
<< " rms=" << info.rms[0]
<< "/" << info.rms[1]
<< "/" << info.rms[2]
<< " bounds=[" << info.bounds[0][0] << ":" << info.bounds[0][1]
<< ", " << info.bounds[1][0] << ":" << info.bounds[1][1]
<< ", " << info.bounds[2][0] << ":" << info.bounds[2][1]
<< "]\n");
task.minfo.push_back(info);
break;
}
}
return 0;
}
// ----------------------------------------------------------------------
/** Extract a numeric run number from a regexp string match into @a
si. Returns true on success, false on failure. In case of
failure an error has already been printed out.
Note that the indexing uses only the run number from the file name
to classify the data into a sample. The data inside the file must
match the run number and this is verified automatically. No new
sample classification occurs at per monitor element level. */
static bool
extractRunNumber(FileInfo &fi, SampleInfo &si, RegexpMatch &m, int matchnr)
{
char *end = 0;
std::string runnr = m.matchString(fi.path.name(), matchnr);
errno = 0;
si.runnr = strtol(runnr.c_str(), &end, 10);
int err = errno;
if (err > 0)
{
std::cerr << fi.path.name() << ": error extracting run number: "
<< strerror(err) << " (error code " << err << ")\n";
return false;
}
else if (! end || *end || si.runnr < 0)
{
std::cerr << fi.path.name() << ": unknown error extracting run number\n";
return false;
}
return true;
}
/** Guess data file parametres from the file name. Classifies the
file to "real data", "relval" and "other monte-carlo" based on the
file name.
For online, sets si.type to #TYPE_DATA and sets the run number.
Only real data is supported for online naming convention. The
dataset name must be given from outside as online does not have
datasets as such. Online files are recognised by #rxonline.
For offline, extracts dataset from the file name into si.dataset.
Also extracts the run number from the file name into si.runnr. If
the run number is greater than one, sets si.type to #TYPE_DATA.
If the run number is one and the dataset name looks like release
validation (i.e., matches #rxrelval), sets si.version to CMSSW
version and si.type to #TYPE_RELVAL, otherwise sets si.type to
#TYPE_MC.
Returns true and fills in @a fi on successful match, returns false
on a failure; the file information may already have been gobbled.
On failure an error message has already been printed out.
Note that fi.sample will not be set. It is assumed the caller
will locate a sample object that matches @a si. */
static bool
fileInfoFromName(FileInfo &fi, SampleInfo &si)
{
RegexpMatch m;
// Check if file name matches online naming convention. If so, a
// dataset name must have been provided; proceed to extract the run
// number from the file name.
if (rxonline.match(fi.path.name(), 0, 0, &m))
{
if (si.dataset.empty())
{
std::cerr << fi.path.name()
<< ": dataset name required for online files\n";
return false;
}
if (extractRunNumber(fi, si, m, 1))
{
si.type = TYPE_DATA;
return true;
}
return false;
}
// Check if the file name matches offline naming convention. If so,
// extract the run number and the dataset name from the file name,
// and classify the file to "data", "relval" or "monte carlo".
else if (rxoffline.match(fi.path.name(), 0, 0, &m))
{
si.dataset = StringOps::replace(m.matchString(fi.path.name(), 2).c_str(),
"__", "/");
if (! extractRunNumber(fi, si, m, 1))
return false;
ASSERT(si.runnr >= 0);
if (si.runnr > 1)
{
if (rxrelvalrundepmc.match(si.dataset, 0, 0, &m)) {
si.type = TYPE_RELVAL_RUNDEPMC;
si.version = m.matchString(si.dataset, 1);
return true;
} else if (rxrundepmc.match(si.dataset, 0, 0, &m)) {
si.type = TYPE_RUNDEPMC;
return true;
} else {
si.type = TYPE_DATA;
return true;
}
}
else if (rxrelval.match(si.dataset, 0, 0, &m))
{
si.runnr = 1;
si.type = TYPE_RELVAL;
si.version = m.matchString(si.dataset, 1);
return true;
}
else
{
si.runnr = 1;
si.type = TYPE_MC;
return true;
}
}
// If neither of the above matched, it's not a valid file name.
else
{
std::cerr << fi.path.name()
<< ": cannot determine file properties from name\n";
return false;
}
}
/** Verify that all file parameters have been set or deduced correctly.
Returns true on success and false on failure. On failure an error
message has already been printed out. */
static bool
verifyFileInfo(const FileInfo &fi)
{
RegexpMatch m;
// Check we have correctly assigned a sample to the file name.
if (fi.path.empty())
{
std::cerr << "empty file name not permitted\n";
return false;
}
if (! fi.sample)
{
std::cerr << fi.path.name()
<< ": file has not been classified to any sample\n";
return false;
}
SampleInfo &si = *fi.sample;
if (! rxdataset.exactMatch(si.dataset))
{
std::cerr << fi.path.name() << ": dataset '"
<< si.dataset << "' not valid\n";
return false;
}
// Match file name against various conventions.
bool online = rxonline.exactMatch(fi.path.name());
bool offline = rxoffline.exactMatch(fi.path.name());
switch (si.type)
{
case TYPE_OTHER:
// Unclassified data is not permitted.
std::cerr << fi.path.name() << ": file type not deduced\n";
return false;
case TYPE_DATA:
// Check that data classified as "real" data matches expected
// conventions: either online, or offline with a real run number
// and dataset name for real data.
if (si.runnr <= 1)
{
std::cerr << fi.path.name()
<< ": file type is 'data' but run number <= 1\n";
return false;
}
if (! si.version.empty())
{
std::cerr << fi.path.name()
<< ": file type is 'data' but it has non-empty version '"
<< si.version << "' (only relval can have a version)\n";
return false;
}
if (! online && ! offline)
{
std::cerr << fi.path.name()
<< ": file type is 'data' but file name does not match"
<< " known conventions\n";
return false;
}
if (offline && si.dataset.compare(0, 7, "/RelVal") == 0)
{
std::cerr << fi.path.name()
<< ": file type is 'data' but file name is relval\n";
return false;
}
return true;
case TYPE_RELVAL:
case TYPE_RELVAL_RUNDEPMC:
// Check that data classified as a relval matched expected
// conventions: offline with a dataset, cmssw version name was
// found, and dataset name matches relval convention.
if (si.runnr != 1 && !rxrelvalrundepmc.match(si.dataset, 0, 0, &m))
{
std::cerr << fi.path.name()
<< ": file type is 'relval' but run number is non-zero ("
<< si.runnr << ") and sample is not a valid Run Dependent MC.\n";
return false;
}
if (si.version.empty())
{
std::cerr << fi.path.name()
<< ": file type is 'relval' but version is empty\n";
return false;
}
if (! offline)
{
std::cerr << fi.path.name()
<< ": file type is 'relval' but file name does not match"
<< " expected convention\n";
return false;
}
if (! rxrelval.exactMatch(si.dataset))
{
std::cerr << fi.path.name()
<< ": file type is 'relval' but dataset name '" << si.dataset
<< "' does not match release validation convention\n";
return false;
}
return true;
case TYPE_MC:
// Check that data classified as simulation data matched expected
// conventions: offline with a dataset name, and it wasn't relval.
if (si.runnr != 1)
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but run number is non-zero ("
<< si.runnr << ")\n";
return false;
}
if (! si.version.empty())
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but it has non-empty version '"
<< si.version << "' (only relval can have a version)\n";
return false;
}
if (online || ! offline)
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but file name does not match"
<< " expected convention\n";
return false;
}
if (offline && si.dataset.compare(0, 7, "/RelVal") == 0)
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but file name is relval\n";
return false;
}
return true;
case TYPE_RUNDEPMC:
// Check that data classified as Run Dependent simulation data
// matched expected conventions: offline with a dataset name, and
// it wasn't relval and has runnumber > 1.
if (si.runnr == 1)
{
std::cerr << fi.path.name()
<< ": file type is 'rundep_mc' but run number is 1\n";
return false;
}
if (! si.version.empty())
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but it has non-empty version '"
<< si.version << "' (only relval can have a version)\n";
return false;
}
if (online || ! offline)
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but file name does not match"
<< " expected convention\n";
return false;
}
if (offline && si.dataset.compare(0, 7, "/RelVal") == 0)
{
std::cerr << fi.path.name()
<< ": file type is 'mc' but file name is relval\n";
return false;
}
return true;
default:
// Can't happen
ASSERT(false);
return false;
}
}
// ----------------------------------------------------------------------
/** Initialise a DQM GUI index. This amounts to creating the
directory structure and initialising empty index files. */
static int
initIndex(const Filename &indexdir)
{
DEBUG(1, "creating index directory " << indexdir << '\n');
Filename::makedir(indexdir, 0755, true, true);
DEBUG(1, "initialising index\n");
VisDQMIndex ix(indexdir);
ix.initialise();
return EXIT_SUCCESS;
}
// ----------------------------------------------------------------------
/** Remove monitor element data to sample data files. */
static void
contract(VisDQMIndex &ix,
VisDQMIndex::Sample &s,
uint64_t nsample,
std::list<Filename> &oldfiles,
std::list<Filename> &newfiles)
{
// Copy data to summary and data files, keeping data in key order.
// If we find a match against NSAMPLE, then skip the copy.
for (int kind = 1; kind >= 0; --kind)
{
VisDQMFile *rfile = ix.open(VisDQMIndex::MASTER_FILE_INFO + kind,
s.files[kind] >> 16,
s.files[kind] & 0xffff,
VisDQMFile::OPEN_READ);
VisDQMFile *wfile = ix.open(VisDQMIndex::MASTER_FILE_INFO + kind,
s.files[kind] >> 16,
(s.files[kind] & 0xffff)
+ (rfile ? 1 : 0),
VisDQMFile::OPEN_WRITE);
DEBUG(1, "writing out new data file " << kind
<< ": in [" << (s.files[kind] >> 16)
<< ':' << (s.files[kind] & 0xffff)
<< "]=" << (rfile ? rfile->path().name() : "(none)")
<< " out " << wfile->path() << '\n');
VisDQMFile::ReadHead rdhead(rfile, IndexKey(0, 0));
VisDQMFile::WriteHead wrhead(wfile);
IndexKey begin(nsample, 0, 0, 0);
IndexKey end(nsample+1, 0, 0, 0);
IndexKey rkey;
void *rstart;
void *rend;
// Transfer keys we are not deleting.
DEBUG(2, "keeping keys up to " << std::hex << begin << std::dec << '\n');
wrhead.xfer(rdhead, begin, &rkey, &rstart, &rend);
// Skip keys until we reach the next sample.
while (! rdhead.isdone() && rkey < end)
{
DEBUG(2, "removing object key " << std::hex << rkey << std::dec << '\n');
rdhead.next();
if (! rdhead.isdone())
rdhead.get(&rkey, &rstart, &rend);
}