forked from Normation/rudder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ExecutionBatch.scala
1303 lines (1153 loc) · 63.4 KB
/
ExecutionBatch.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
*************************************************************************************
* Copyright 2011 Normation SAS
*************************************************************************************
*
* This file is part of Rudder.
*
* Rudder is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* In accordance with the terms of section 7 (7. Additional Terms.) of
* the GNU General Public License version 3, the copyright holders add
* the following Additional permissions:
* Notwithstanding to the terms of section 5 (5. Conveying Modified Source
* Versions) and 6 (6. Conveying Non-Source Forms.) of the GNU General
* Public License version 3, when you create a Related Module, this
* Related Module is not considered as a part of the work and may be
* distributed under the license agreement of your choice.
* A "Related Module" means a set of sources files including their
* documentation that, without modification of the Source Code, enables
* supplementary functions or services in addition to those offered by
* the Software.
*
* Rudder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Rudder. If not, see <http://www.gnu.org/licenses/>.
*
*************************************************************************************
*/
package com.normation.rudder.services.reports
import com.normation.inventory.domain.NodeId
import org.joda.time._
import com.normation.rudder.domain.logger.ComplianceDebugLogger
import com.normation.rudder.domain.logger.ComplianceDebugLogger._
import com.normation.rudder.domain.logger.TimingDebugLogger
import com.normation.inventory.domain.NodeId
import com.normation.rudder.domain.reports._
import com.normation.rudder.reports._
import com.normation.rudder.reports.execution.AgentRunId
import net.liftweb.common.Loggable
import java.util.regex.Pattern
import com.normation.rudder.domain.policies.PolicyMode
import com.normation.rudder.domain.reports.ReportType.BadPolicyMode
import com.normation.rudder.reports.execution.AgentRunWithNodeConfig
import com.normation.rudder.domain.policies.RuleId
/*
* we want to retrieve for each node the expected reports that matches it LAST
* run, or if no last run, what we are wainting for.
*
* So the general resolution for ONE node is:
* - 1/ context
* - get the last run
* - get the list of nodeConfigIdInfo
*
* - 2/ get the config id and date
* - 2.1: from the run:
* - if we have it, find the option[date] matching that version
* - 2.2: else (no run), we get the last available from nodeinfo with date
* - if none, get the last expected reports for the node, but not configId/date for it
* - so, we have a configId and an option[date] of validity
* => get expectedReports
*
* - now, get reports for the run if any, and compute merge.
*
*
* For a node, we will get:
*
* - run date:
* - Some(t): the node at least talked to us!
* - None : never talked, build a pending/noreport list
* - (configId, eol of that config):
* - Some(id): we know what we expect!
* - Some(date): we know the validity !: the simplest case
* - None: this is an unexpected version (no meta info on it). Error.
* - None: migration only, that case will disappear with time. Compat mode,
* consider expected report for the node are ok. Don't care of the (None) date.
*
*/
sealed trait RunAndConfigInfo
sealed trait ErrorNoConfigData extends RunAndConfigInfo
sealed trait ExpectedConfigAvailable extends RunAndConfigInfo {
def expectedConfig: NodeExpectedReports
}
sealed trait NoReport extends ExpectedConfigAvailable
sealed trait Unexpected extends ExpectedConfigAvailable
sealed trait Ok extends ExpectedConfigAvailable
//a marker trait which indicate that we want to
//have the details of the run
sealed trait LastRunAvailable extends RunAndConfigInfo {
// this is the date and time reported by the run
// (local node time of the starts of the run)
def lastRunDateTime: DateTime
//this is not an option even if we must take care of nodes
//upgrading from previous version of Rudder without
//config id because we must have some logic to find WHAT
//is the applicable version in all case - it's one of the
//major goal of ExecutionBatch#computeNodeRunInfo
def lastRunConfigId: NodeConfigId
// most of the time, we do have the corresponding
// configId in base. But not always, for example
// if the node configId was corrupted
def lastRunConfigInfo: Option[NodeExpectedReports]
}
/**
* The type of report to use for missing reports
* (actually missing reports).
* Depends of compliance mode.
*/
sealed trait ExpiringStatus extends RunAndConfigInfo {
def expirationDateTime: DateTime
}
/*
* Really, that node exists ?
*/
final case object NoRunNoExpectedReport extends ErrorNoConfigData
/*
* We don't have the needed configId in the expected
* table. Either we don't have any config id at all,
* or we can't find the version matching a run.
* (it is some weird data lost in the server, or a node
* not yet initialized)
*/
final case class NoExpectedReport(
lastRunDateTime: DateTime
, lastRunConfigId: Option[NodeConfigId]
) extends ErrorNoConfigData
/*
* No report of interest (either none, or
* some but too old for our situation)
*/
final case class NoReportInInterval(
expectedConfig: NodeExpectedReports
) extends NoReport
/*
* No report of interest but expected because
* we are on the correct mode for that
*/
final case class ReportsDisabledInInterval(
expectedConfig: NodeExpectedReports
) extends NoReport
final case class Pending(
expectedConfig : NodeExpectedReports
, optLastRun : Option[(DateTime, NodeExpectedReports)]
, expirationDateTime : DateTime
) extends NoReport with ExpiringStatus
/*
* the case where we have a version on the run,
* versions are init in the server for that node,
* and we don't have a version is an error
*/
final case class UnexpectedVersion(
lastRunDateTime : DateTime
, lastRunConfigInfo : Some[NodeExpectedReports]
, lastRunExpiration : DateTime
, expectedConfig : NodeExpectedReports
, expectedExpiration: DateTime
) extends Unexpected with LastRunAvailable {
val lastRunConfigId = lastRunConfigInfo.get.nodeConfigId
}
/**
* A case where we have a run without version,
* but we really should, because versions are init
* in the server for that node
*/
final case class UnexpectedNoVersion(
lastRunDateTime : DateTime
, lastRunConfigId : NodeConfigId
, lastRunExpiration : DateTime
, expectedConfig : NodeExpectedReports
, expectedExpiration: DateTime
) extends Unexpected with LastRunAvailable {
val lastRunConfigInfo = None
}
/**
* A case where we have a run with a version,
* but we didn't find it in db,
* but we really should, because versions are init
* in the server for that node
*/
final case class UnexpectedUnknowVersion(
lastRunDateTime : DateTime
, lastRunConfigId : NodeConfigId
, expectedConfig : NodeExpectedReports
, expectedExpiration: DateTime
) extends Unexpected with LastRunAvailable {
val lastRunConfigInfo = None
}
final case class ComputeCompliance(
lastRunDateTime : DateTime
, expectedConfig : NodeExpectedReports
, expirationDateTime : DateTime
) extends Ok with LastRunAvailable with ExpiringStatus {
val lastRunConfigId = expectedConfig.nodeConfigId
val lastRunConfigInfo = Some(expectedConfig)
}
/*
* An ADT to describe the behaviour regarding Unexpected reports.
* Unexpected reports are reports that the node sent but that were not
* awaited in the expected configuration reporting.
* There is mainly 4 causes for unexpected rerpots:
* - a bug in a Technique that send a report when it should not. This is a really unexpected report, and
* shoud be reported to Technique maintener for correction.
* - a Technique is executed when it should not. Typically, the node didn't fetch the updated version
* of its policies and it still report on the previous one. Real unexpectation.
* - the network sneezed and syslog went mad. In such a case, we can miss reports or have some duplicated. Missing
* reports is embarassing, but syslog tries very hard to not do that, so we have more often duplicated ones.
* - reports were on a part of the technique with a cfengine parameter which happened to be an iterator and
* so it was exectued several time, and there is as many reports. This case is expected, but in the general case,
* we don't have anything that allows to anticipate the number of reports sent back (and specific cases like "the
* number is available in a server-side known variable" are hard, too, because it means that we know to parse and
* interpret a lot more of cfengine gramar).
*
* The last two cases bring a lot of false positive bad compliance, and we want to let the user be able to
* accept them. We want that to be optionnal, has in each cases, it could be real unexpected reports (but it's very
* unlikely).
*/
sealed trait UnexpectedReportBehavior
object UnexpectedReportBehavior {
// if two reports are exactly the same and one is unexpected, assume it was a duplication
final case object AllowsDuplicate extends UnexpectedReportBehavior
// if a reports originally has a CFEngine var, allows to get several reports value for it.
final case object UnboundVarValues extends UnexpectedReportBehavior
}
final case class UnexpectedReportInterpretation(options: Set[UnexpectedReportBehavior]) {
// true if the set of option contains `option`
def isSet(opt: UnexpectedReportBehavior) = options.contains(opt)
// check if ALL the provided options are set
def allSet(opts: UnexpectedReportBehavior*) = {
val o = opts.toSet
options.intersect(o) == o
}
// check if AT LEAST ONE of provided options is set
def anySet(opts: UnexpectedReportBehavior*) = options.intersect(opts.toSet).nonEmpty
// return a copy of that interpretation with the given value set
def set(opt: UnexpectedReportBehavior) = UnexpectedReportInterpretation(options + opt)
// return a copy of that interpretation with the given value removed
def unset(opt: UnexpectedReportBehavior) = UnexpectedReportInterpretation(options.filter( _ != opt))
}
/**
* An execution batch contains the node reports for a given Rule / Directive at a given date
* An execution Batch is at a given time <- TODO : Is it relevant when we have several node ?
*/
object ExecutionBatch extends Loggable {
//these patterns must be reluctant matches to avoid strange things
//when two variables are presents, or something like: ${foo}xxxxxx}.
final val matchCFEngineVars = """.*\$(\{.+?\}|\(.+?\)).*""".r
final private val replaceCFEngineVars = """\$\{.+?\}|\$\(.+?\)"""
/**
* containers to store common information about "what are we
* talking about in that merge ?"
*/
private[reports] final case class MergeInfo(
nodeId: NodeId
, run: Option[DateTime]
, configId: Option[NodeConfigId]
, expirationTime: DateTime
)
/**
* The time that we are going to give to the agent as a grace period to get
* its reports.
* That time is added to the normal agent execution period, so that:
* - if the agent runs every 2 minutes, reports should have been received
* after 7 minutes max
* - if the agent runs every 240 minutes, reports should have been received
* after 245 minutes max.
*
* That notion only makes sens for the compliance mode, as it is expected to
* NOT receive report in the changes-only mode.
*/
final val GRACE_TIME_PENDING = Duration.standardMinutes(5)
/**
* Then end of times, used to denote report which are not expiring
*/
final val END_OF_TIME = new DateTime(Long.MaxValue)
/**
* Takes a string, that should contains a CFEngine var ( $(xxx) or ${xxx} )
* replace the $(xxx) (or ${xxx}) part by .*
* and doubles all the \
* Returns a string that is suitable for a being used as a regexp, with anything not
* in ".*" quoted with \Q...\E.
* For example, "${foo}(bar)$(baz)foo" => "\Q\E.*\Q(bar)\E.*\Qfoo\E"
*/
final def replaceCFEngineVars(x : String) : Pattern = {
Pattern.compile("""\Q"""+ x.replaceAll(replaceCFEngineVars, """\\E.*\\Q""") + """\E""")
}
final case class ContextForNoAnswer(
agentExecutionInterval: Int
, complianceMode : ComplianceMode
)
/*
* Utility method to factor out common logging task and be assured that
* the log message is actually sync with the info type.
*/
private[this] def runType(traceMessage: String, runType: RunAndConfigInfo)(implicit nodeId: NodeId): RunAndConfigInfo = {
val msg = if(traceMessage.trim.isEmpty) "" else ": " + traceMessage
ComplianceDebugLogger.node(nodeId).trace(s"Run config for node ${nodeId.value}: ${runType.logName} ${msg}")
runType
}
/*
* For each node, get the config it has.
* This method bases its result on THE LAST RUN
* of each node, and try to discover the run linked information (datetime, config id).
*
*/
def computeNodesRunInfo(
// The set of run associated with ALL requested node.
//
// If exists, the last run received for these nodes is coupled with the
// corresponding expected node configuration for that run, which will allow to know what
// config option to apply.
runs : Map[NodeId, Option[AgentRunWithNodeConfig]]
// the current expected node configurations for all nodes.
// This is useful for nodes without runs (ex in no-report mode), node with a run not for the
// last config (show diff etc). It may be none for ex. when a node was added since last generation
, currentNodeConfigs: Map[NodeId, Option[NodeExpectedReports]]
// other config information to allows better reporting on error
, nodeConfigIdInfos: Map[NodeId, Option[Seq[NodeConfigIdInfo]]]
): Map[NodeId, RunAndConfigInfo] = {
/*
* How long time a run is valid AFTER AN UPDATE (i.e, not in permanent regime).
* This is shorter than runValidityTime, because a config update IS a change and
* force to send reports in all case.
*/
def updateValidityDuration(runIntervalInfo: ResolvedAgentRunInterval) = runIntervalInfo.interval.plus(GRACE_TIME_PENDING)
/*
* How long time a run is valid before receiving any report (but not after an update)
*/
def runValidityDuration(runIntervalInfo: ResolvedAgentRunInterval, complianceMode: ComplianceMode) = complianceMode.mode match {
case ChangesOnly =>
//expires after run*heartbeat period - we need an other run before that.
val heartbeat = Duration.standardMinutes((runIntervalInfo.interval.getStandardMinutes * runIntervalInfo.heartbeatPeriod ))
heartbeat.plus(GRACE_TIME_PENDING)
case FullCompliance | ReportsDisabled =>
updateValidityDuration(runIntervalInfo)
}
val now = DateTime.now
runs.map { case (nodeId, optRun) =>
implicit val _n = nodeId
//all the problem is to find the correct NodeRunInfo from the optRun and other information
val nodeRunInfo = optRun match {
//
// [I] First case: we don't have (recent) runs for that node
// Perhaps its a new node (with or without a generation), or are we in ReportsDisabled
// So we don't have matching expected reports either.
//
case None =>
// Let try to see what is currently expected from that node
currentNodeConfigs.get(nodeId).flatten match {
case None =>
//let's see if the node has ANY config info
nodeConfigIdInfos.getOrElse(nodeId, None) match {
case None =>
//ok, it's a node without any config (so without runs, of course). Perhaps a new node ?
runType(s"nodeId has no configuration ID version, perhaps it's a new Node?", NoRunNoExpectedReport)
case Some(configs) =>
//so, the node has existed at some point, but not now. Strange.
runType("nodeId exists in DB but has no version (due to cleaning, migration, synchro, etc)", NoRunNoExpectedReport)
}
case Some(currentConfig) =>
if(currentConfig.complianceMode.mode == ReportsDisabled) { // oh, so in fact it's normal to not have runs!
runType(s"compliance mode is set to '${ReportsDisabled.name}', it's ok to not having reports", ReportsDisabledInInterval(currentConfig))
} else { //let's further examine the situation
val expireTime = currentConfig.beginDate.plus(updateValidityDuration(currentConfig.agentRun))
if(expireTime.isBefore(now)) {
runType("no run (ever or too old)", NoReportInInterval(currentConfig))
} else {
runType(s"no run (ever or too old), Pending until ${expireTime}"
, Pending(currentConfig, None, expireTime)
)
}
}
}
//
// [II] Second case: we DO have a run.
// We need to look if it has an associated expected configuration,
// and if not try to gather piece of information from elsewhere.
// Then analyse the consistancy of the result.
//
case Some(runInfos) =>
// ComplianceDebugLogger.node(nodeId).debug(s"Node run configuration: ${(nodeId, complianceMode, runInfos).toLog }")
//
// val computed = computeNodeRunInfo(
// nodeId, optInfo, missingReportType
// , intervalInfo, updateValidityTime(intervalInfo), runValidityTime(intervalInfo)
// , now, run
// )
(runInfos, currentNodeConfigs.get(nodeId).flatten) match {
//
// #1 : What the hell ?
// that's not good. Why a run without expected config ?
// More over, we group together the cases where we have config for the run, because without
// a current expected config, it should be impossible (no way to paired it with that run).
// Just log an error for dev.
case ((AgentRunWithNodeConfig(AgentRunId(_, t), optConfigId, _, _)), None) =>
if(nodeConfigIdInfos.isDefinedAt(nodeId)) {
runType("nodeId exists in DB but has no version (due to cleaning?). Need regeneration, no expected report yet.", NoExpectedReport(t, None))
} else {
runType("nodeId was not found in DB but is sending reports. It is likely a new node. Need regeneration, no expected report yet."
, NoExpectedReport(t, None)
)
}
//
// #2 : run without config ID (neither in it nor found)
// no expected config for the run. Why so? At least, a recent config.
case ((AgentRunWithNodeConfig(AgentRunId(_, t), None, _, _)), Some(currentConfig)) =>
/*
* Here, we want to check two things:
* - does the run should have contain a config id ?
* It should if the oldest config was created a long time ago,
* and if it is the case most likelly the node can't get
* its updated promises.
* The logic is that only nodes with initial promises send reports without a config Id. So
* if a node is in that case, it is because it never got genererated promises.
* If the first generated promises for that node are beyond the grace period, it means that
* the run should have used theses promises, and we have a (DNS) problem because it didn't.
*
* - else, we look at the most recent
* config and decide between pending / no answer
*
* Note: we must use value from current expected config for modes,
* because we don't have anything else really.
*/
val oldestConfigId = nodeConfigIdInfos.get(nodeId).flatten.getOrElse(Seq(currentConfig.configInfo)).minBy( _.creation.getMillis)
val oldestExpiration = oldestConfigId.creation.plus(updateValidityDuration(currentConfig.agentRun))
if(oldestExpiration.isBefore(t) ) {
//we had a config set a long time ago, then interval+grace time happen, and then
//we get a run without any config id => the node didn't updated its promises
runType(s"node send reports without nodeConfigId but the oldest configId (${oldestConfigId.configId.value}) expired since ${oldestExpiration})"
, UnexpectedNoVersion(t, oldestConfigId.configId, oldestExpiration, currentConfig, oldestExpiration)
)
} else {
val expirationTime = currentConfig.beginDate.plus(updateValidityDuration(currentConfig.agentRun))
if(expirationTime.isBefore(t)) {
runType(s"node should have sent reports for configId ${currentConfig.nodeConfigId.value} before ${expirationTime} but got a report at ${t} without any configId"
, NoReportInInterval(currentConfig)
)
} else {
runType(s"waiting for node to send reports for configId ${currentConfig.nodeConfigId.value} before ${expirationTime} (last run at ${t} didn't have any configId"
, Pending(currentConfig, None, expirationTime) //here, "None" even if we have a old run, because we don't have expectedConfig for it.
)
}
}
//
// #3 : run with a version ID !
// But no corresponding expected Node. A
// And no current one.
case ((AgentRunWithNodeConfig(AgentRunId(_, t), Some((rv,None)), _, _)), Some(currentConfig)) =>
//it's a bad version, but we have config id in DB => likelly a corruption on node
//expirationTime is the date after which we must have gotten a report for the current version
val expirationTime = currentConfig.beginDate.plus(updateValidityDuration(currentConfig.agentRun))
runType(s"nodeId exists in DB and has configId, expected configId is ${currentConfig.nodeConfigId.value}, but ${rv.value} was not found (node corruption?)",
UnexpectedUnknowVersion(t, rv, currentConfig, expirationTime)
)
//
// #4 : run with an ID ! And a mathching expected config ! And a current expected config !
// So this is the standard case.
// We have to check if run version == expected, if it's the case: nominal case.
// Else, we need to check if the node version is not too old,
case ((AgentRunWithNodeConfig(AgentRunId(_, t), Some((rv, Some(runConfig))), _, _)), Some(currentConfig)) =>
runConfig.endDate match {
case None =>
val expirationTime = t.plus(runValidityDuration(currentConfig.agentRun, currentConfig.complianceMode))
if(expirationTime.isBefore(now)) {
//take care of the potential case where currentConfig != runConfig in the log messae
runType(s"Last run at ${t} is for the configId ${runConfig.nodeConfigId.value} but a new one should have been sent for configIf ${currentConfig.nodeConfigId.value} before ${expirationTime}"
, NoReportInInterval(currentConfig)
)
} else { //nominal case
//here, we have to verify that the config id are different, because we can
//be in the middle of a generation of have a badly closed node configuration on base
if(runConfig.nodeConfigId != currentConfig.nodeConfigId) {
//standard case: we changed version and are waiting for a run with the new one.
runType(s"last run at ${t} was for previous configId ${runConfig.nodeConfigId.value} and no report received for current configId ${currentConfig.nodeConfigId.value}, but ${now} is before expiration time ${expirationTime}, Pending"
, Pending(currentConfig, Some((t, runConfig)), expirationTime)
)
} else {
// the node is answering current config, on time
runType(s"Last run at ${t} is for the correct configId ${currentConfig.nodeConfigId.value} and not expired, compute compliance"
, ComputeCompliance(t, currentConfig, expirationTime)
)
}
}
case Some(eol) =>
//check if the run is not too old for the version, i.e if endOflife + grace is before run
// a more recent version exists, so we are either awaiting reports
// for it, or in some error state (completely unexpected version or "just" no report
val eolExpiration = eol.plus(updateValidityDuration(runConfig.agentRun))
val expirationTime = currentConfig.beginDate.plus(updateValidityDuration(currentConfig.agentRun))
if(eolExpiration.isBefore(t)) {
//we should have had a more recent run
runType(s"node sent reports at ${t} for configId ${rv.value} (which expired at ${eol}) but should have been for configId ${currentConfig.nodeConfigId.value}"
, UnexpectedVersion(t, Some(runConfig), eolExpiration, currentConfig, expirationTime)
)
} else {
if(expirationTime.isBefore(now)) {
runType(s"last run at ${t} was for expired configId ${rv.value} and no report received for current configId ${currentConfig.nodeConfigId.value} (one was expected before ${expirationTime})"
, NoReportInInterval(currentConfig)
)
} else {
//standard case: we changed version and are waiting for a run with the new one.
runType(s"last run at ${t} was for expired configId ${rv.value} and no report received for current configId ${currentConfig.nodeConfigId.value}, but ${now} is before expiration time ${expirationTime}, Pending"
, Pending(currentConfig, Some((t, runConfig)), eolExpiration)
)
}
}
}
}
}
// now that we finally have the runInfo, returned it coupled with nodeId for final result
(nodeId, nodeRunInfo)
}.toMap
}
/**
* This is the main entry point to get the detailed reporting
* It returns a Sequence of NodeStatusReport which gives, for
* each node, the status and all the directives associated.
*
* The contract is to give to that function a list of expected
* report for an unique given node
*
*/
def getNodeStatusReports(
nodeId : NodeId
// run info: if we have a run, we have a datetime for it
// and perhaps a configId
, runInfo : RunAndConfigInfo
// reports we get on the last know run
, agentExecutionReports : Seq[Reports]
, unexpectedInterpretation: UnexpectedReportInterpretation
) : NodeStatusReport = {
def buildUnexpectedVersion(runTime: DateTime, runVersion: Option[NodeConfigIdInfo], runExpiration: DateTime, expectedConfig: NodeExpectedReports, expectedExpiration: DateTime, nodeStatusReports: Seq[ResultReports]) = {
//mark all report of run unexpected,
//all expected missing
buildRuleNodeStatusReport(
MergeInfo(nodeId, Some(runTime), Some(expectedConfig.nodeConfigId), expectedExpiration)
, expectedConfig
, ReportType.Missing
) ++
buildUnexpectedReports(MergeInfo(nodeId, Some(runTime), runVersion.map(_.configId), runExpiration), nodeStatusReports)
}
//only interesting reports: for that node, with a status
val nodeStatusReports = agentExecutionReports.collect{ case r: ResultReports if(r.nodeId == nodeId) => r }
ComplianceDebugLogger.node(nodeId).trace(s"Computing compliance for node ${nodeId.value} with: [${runInfo.toLog}]")
val t1 = System.currentTimeMillis
val ruleNodeStatusReports = runInfo match {
case ReportsDisabledInInterval(expectedConfig) =>
ComplianceDebugLogger.node(nodeId).trace(s"Compliance mode is ${ReportsDisabled.name}, so we don't have to try to merge/compare with expected reports")
buildRuleNodeStatusReport(
//these reports don't really expires - without change, it will
//always be the same.
MergeInfo(nodeId, None, Some(expectedConfig.nodeConfigId), END_OF_TIME)
, expectedConfig
, ReportType.Disabled
)
case ComputeCompliance(lastRunDateTime, expectedConfig, expirationTime) =>
ComplianceDebugLogger.node(nodeId).trace(s"Using merge/compare strategy between last reports from run at ${lastRunDateTime} and expect reports ${expectedConfig.toLog}")
mergeCompareByRule(
MergeInfo(nodeId, Some(lastRunDateTime), Some(expectedConfig.nodeConfigId), expirationTime)
, nodeStatusReports
, expectedConfig
, expectedConfig
, unexpectedInterpretation
)
case Pending(expectedConfig, optLastRun, expirationTime) =>
optLastRun match {
case None =>
ComplianceDebugLogger.node(nodeId).trace(s"Node is Pending with no reports from a previous run, everything is pending")
// we don't have previous run, so we can simply say that all component in node are Pending
buildRuleNodeStatusReport(
MergeInfo(nodeId, None, Some(expectedConfig.nodeConfigId), expirationTime)
, expectedConfig
, ReportType.Pending
)
case Some((runTime, runConfig)) =>
/*
* In that case, we need to compute the status of all component in the previous run,
* then keep these result for component in the new expected config and for
* component in new expected config BUT NOT is the one for which we have the run,
* set pending.
*/
ComplianceDebugLogger.node(nodeId).trace("Node is Pending with reports from previous run, using merge/compare strategy between last "
+ s"reports from run ${runConfig.toLog} and expect reports ${expectedConfig.toLog}")
mergeCompareByRule(
MergeInfo(nodeId, Some(runTime), Some(expectedConfig.nodeConfigId), expirationTime)
, nodeStatusReports
, runConfig
, expectedConfig
, unexpectedInterpretation
)
}
case NoReportInInterval(expectedConfig) =>
ComplianceDebugLogger.node(nodeId).trace(s"Node didn't received reports recently, status depend of the compliance mode and previous report status")
buildRuleNodeStatusReport(
//these reports don't really expires - without change, it will
//always be the same.
MergeInfo(nodeId, None, Some(expectedConfig.nodeConfigId), END_OF_TIME)
, expectedConfig
, ReportType.NoAnswer
)
case UnexpectedVersion(runTime, Some(runConfig), runExpiration, expectedConfig, expectedExpiration) =>
ComplianceDebugLogger.node(nodeId).warn(s"Received a run at ${runTime} for node '${nodeId.value}' with configId '${runConfig.nodeConfigId.value}' but that node should be sending reports for configId ${expectedConfig.nodeConfigId.value}")
buildUnexpectedVersion(runTime, Some(runConfig.configInfo), runExpiration, expectedConfig, expectedExpiration, nodeStatusReports)
case UnexpectedNoVersion(runTime, runId, runExpiration, expectedConfig, expectedExpiration) => //same as unextected, different log
ComplianceDebugLogger.node(nodeId).warn(s"Received a run at ${runTime} for node '${nodeId.value}' without any configId but that node should be sending reports for configId ${expectedConfig.nodeConfigId.value}")
buildUnexpectedVersion(runTime, None, runExpiration, expectedConfig, expectedExpiration, nodeStatusReports)
case UnexpectedUnknowVersion(runTime, runId, expectedConfig, expectedExpiration) => //same as unextected, different log
ComplianceDebugLogger.node(nodeId).warn(s"Received a run at ${runTime} for node '${nodeId.value}' configId '${runId.value}' which is not known by Rudder, and that node should be sending reports for configId ${expectedConfig.nodeConfigId.value}")
buildUnexpectedVersion(runTime, None, runTime, expectedConfig, expectedExpiration, nodeStatusReports)
case NoExpectedReport(runTime, optConfigId) =>
// these reports where not expected
ComplianceDebugLogger.node(nodeId).warn(s"Node '${nodeId.value}' sent reports for run at '${runInfo}' (with ${
optConfigId.map(x => s" configuration ID: '${x.value}'").getOrElse(" no configuration ID")
}). No expected configuration matches these reports.")
buildUnexpectedReports(MergeInfo(nodeId, Some(runTime), optConfigId, END_OF_TIME), nodeStatusReports)
case NoRunNoExpectedReport =>
/*
* Really, this node exists ? Shouldn't we just declare Ragnarök at that point ?
*/
ComplianceDebugLogger.node(nodeId).warn(s"Can not get compliance for node with ID '${nodeId.value}' because it has no configuration id initialised nor sent reports (node just added ?)")
Set[RuleNodeStatusReport]()
}
/*
* We must adapt the node run compliance info if we have
* an abort message or at least one mixed mode result
*/
val t2 = System.currentTimeMillis
TimingDebugLogger.trace(s"Compliance: getNodeStatusReports - computing compliance for node ${nodeId}: ${t2-t1}ms")
val status = {
val abort = agentExecutionReports.collect {
case r: LogReports if(r.nodeId == nodeId && r.component.toLowerCase == "abort run") =>
RunComplianceInfo.PolicyModeError.AgentAbortMessage(r.keyValue, r.message)
}.toSet
val mixed = ruleNodeStatusReports.collect { case r => r.directives.collect { case (_, d) if (d.compliance.badPolicyMode > 0) =>
RunComplianceInfo.PolicyModeError.TechniqueMixedMode(s"Error for node '${nodeId.value}' in directive '${d.directiveId.value}': either that directive is"+
" not sending the correct Policy Mode reports (for example Enforce reports in place of Audit one - does the directive's Technique is up-to-date?)"+
" or at least one other directive on that node based on the same Technique sends reports for a different Policy Mode")
}.toSet }.flatten
(abort ++ mixed).toList match {
case Nil => RunComplianceInfo.OK
case list => RunComplianceInfo.PolicyModeInconsistency(list)
}
}
val t3 = System.currentTimeMillis
TimingDebugLogger.trace(s"Compliance: computing policy status for ${nodeId}: ${t3-t2}ms")
val overrides = runInfo match {
case x: ExpectedConfigAvailable => x.expectedConfig.overrides
case x: LastRunAvailable => x.lastRunConfigInfo.map( _.overrides ).getOrElse(Nil).toList
case _ => Nil
}
NodeStatusReport.applyByNode(nodeId, runInfo, status, overrides, ruleNodeStatusReports)
}
// utility method to find how missing report should be reported given the compliance
// mode of the node.
private[reports] def missingReportType(complianceMode: ComplianceMode, policyMode: PolicyMode) = complianceMode.mode match {
case FullCompliance => ReportType.Missing
case ChangesOnly => policyMode match {
case PolicyMode.Enforce => ReportType.EnforceSuccess
case PolicyMode.Audit => ReportType.AuditCompliant
}
case ReportsDisabled => ReportType.Disabled
}
/**
* That method only take care of the low level logic of comparing
* expected reports with actual reports rule by rule. So we expect
* that something before took care of all the macro-,node-related-states
* (pending, non compatible version, etc).
*
* In that method, if we don't have a report for an expected component, it
* only can be a missing report (since we have all the reports of the run
* for the node), and if we have reports for other components than the one
* expected, they are unexpected.
*
* We have a two level comparison to do:
* - the expected reports corresponding to the run
* => we actually do the comparison component by component
* - the expected reports not in the previous set
* => the get the missingReport status (pending, success, missing
* depending of the time and compliance mode)
*
* The "diff" is done at the rule level, what means that, for example,
* a pending state will be applied to the whole rule, even if only
* one component changed on it (for ex, we just added an user).
*
* That choice can be made more precise latter but for now it is linked
* to the "serial" of rule.
*/
private[reports] def mergeCompareByRule(
mergeInfo : MergeInfo
// only report for that nodeid, of type ResultReports,
// for the correct run, for the correct version
, executionReports : Seq[ResultReports]
, lastRunNodeConfig : NodeExpectedReports
, currentConfig : NodeExpectedReports
, unexpectedInterpretation: UnexpectedReportInterpretation
): Set[RuleNodeStatusReport] = {
var u1, u2, u3, u4 = 0L
val t0 = System.currentTimeMillis
val reportsPerRule = executionReports.groupBy(_.ruleId)
val complianceForRun: Map[RuleId, RuleNodeStatusReport] = (for {
RuleExpectedReports(ruleId
, directives ) <- lastRunNodeConfig.ruleExpectedReports
directiveStatusReports = {
val t1 = System.nanoTime
//here, we had at least one report, even if it not a ResultReports (i.e: run start/end is meaningful
val reportsForThatNodeRule: Seq[ResultReports] = reportsPerRule.getOrElse(ruleId, Seq[ResultReports]())
val reports = reportsForThatNodeRule.groupBy(x => (x.directiveId, x.component) )
val expectedComponents = (for {
directive <- directives
policyMode = PolicyMode.directivePolicyMode(
lastRunNodeConfig.modes.globalPolicyMode
, lastRunNodeConfig.modes.nodePolicyMode
, directive.policyMode
, directive.isSystem
)
// the status to use for ACTUALLY missing reports, i.e for reports for which
// we have a run but are not here. Basically, it's "missing" when on
// full compliance and "success" when on changes only - but that success
// depends upon the policy mode
missingReportStatus = missingReportType(lastRunNodeConfig.complianceMode, policyMode)
component <- directive.components
} yield {
((directive.directiveId, component.componentName), (policyMode, missingReportStatus, component))
}).toMap
val t2 = System.nanoTime
u1 += t2-t1
/*
* now we have three cases:
* - expected component without reports => missing (modulo changes only interpretation)
* - reports without expected component => unknown
* - both expected component and reports => check
*/
val reportKeys = reports.keySet
val expectedKeys = expectedComponents.keySet
val okKeys = reportKeys.intersect(expectedKeys)
// If okKeys.size == reportKeys.size, there is no unexpected reports
// If okKeys.size == expectedKeys.size, there is no missing reports
val missing = (if (okKeys.size != expectedKeys.size) {
expectedComponents.view.filterKeys(k => !reportKeys.contains(k)).toMap.map { case ((d,_), (pm,mrs,c)) =>
DirectiveStatusReport(d, Map(c.componentName ->
/*
* Here, we group by unexpanded component value, not expanded one. We want in the end:
* -- edit file ## component name
* --- /tmp/${rudder.node.ip} ## component value
* ----- /tmp/ip1 ## report 1
* ----- /tmp/ip2 ## report 2
* Not:
* -- edit file
* --- /tmp/ip1
* ----- /tmp/ip1
* --- /tmp/ip2
* ----- /tmp/ip2
*/
ComponentStatusReport(c.componentName, c.groupedComponentValues.map { case(v,u) => (u ->
ComponentValueStatusReport(v, u, MessageStatusReport(mrs, None) :: Nil)
)}.toMap)
))
}
} else {
Nil
})
val t3 = System.nanoTime
u2 += t3-t2
//unexpected contains the one with unexpected key and all non matching serial/version
val unexpected = (if (okKeys.size != reportKeys.size) {
buildUnexpectedDirectives(
reports.view.filterKeys(k => !expectedKeys.contains(k)).values.flatten.toSeq
)
} else {
Seq[DirectiveStatusReport]()
})
val t4 = System.nanoTime
u3 += t4-t3
val expected = okKeys.map { k =>
val (policyMode, missingReportStatus, components) = expectedComponents(k)
DirectiveStatusReport(k._1, Map(k._2 ->
checkExpectedComponentWithReports(components, reports(k), missingReportStatus, policyMode, unexpectedInterpretation)
))
}
val t5 = System.nanoTime
u4 += t5-t4
missing ++ unexpected ++ expected
}
} yield {
(
ruleId
, RuleNodeStatusReport(
mergeInfo.nodeId
, ruleId
, mergeInfo.run
, mergeInfo.configId
, DirectiveStatusReport.merge(directiveStatusReports)
, mergeInfo.expirationTime
)
)
}).toMap
val t10 = System.currentTimeMillis
TimingDebugLogger.trace(s"Compliance: mergeCompareByRule - prepare data: ${u1/1000}µs")
TimingDebugLogger.trace(s"Compliance: mergeCompareByRule - get missing reports: ${u2/1000}µs")
TimingDebugLogger.trace(s"Compliance: mergeCompareByRule - unexpected directives computation: ${u3/1000}µs")
TimingDebugLogger.trace(s"Compliance: mergeCompareByRule - expected directives computation: ${u4/1000}µs")
TimingDebugLogger.trace(s"Compliance: Compute complianceForRun map: ${t10-t0}ms")
//now, for all current expected reports, choose between the computed value and the default one
// note: isn't there something specific to do for unexpected reports ? Keep them all ?
val currentRunReports = buildRuleNodeStatusReport(mergeInfo, currentConfig, ReportType.Pending)
val t11 = System.currentTimeMillis
TimingDebugLogger.trace(s"Compliance: mergeCompareByRule - compute buildRuleNodeStatusReport: ${t11-t10}ms")
val (computed, newStatus) = currentRunReports.foldLeft((List[RuleNodeStatusReport](), List[RuleNodeStatusReport]())) { case ( (c,n), currentStatusReports) =>
complianceForRun.get(currentStatusReports.ruleId) match {
case None => //the whole rule is new!
//here, the reports are ACTUALLY pending, not missing.
(c, currentStatusReports :: n )
case Some(runStatusReport) => //look for added / removed directive
val runDirectives = runStatusReport.directives
val currentDirectives = currentStatusReports.directives
//don't keep directive that were removed between the two configs
val toKeep = runDirectives.view.filterKeys(k => currentDirectives.keySet.contains(k)).toMap
//now override currentDirective with the one to keep in currentReport
val updatedDirectives = currentDirectives ++ toKeep
val newCompliance = runStatusReport.copy(directives = updatedDirectives)
(newCompliance :: c, n)
}
}
val t12 = System.currentTimeMillis
TimingDebugLogger.trace(s"Compliance: mergeCompareByRule - compute compliance : ${t12-t11}ms")
if (ComplianceDebugLogger.node(mergeInfo.nodeId).isTraceEnabled) {
ComplianceDebugLogger.node(mergeInfo.nodeId).trace(s"Compute compliance for node ${mergeInfo.nodeId.value} using: rules for which compliance is based on run reports: ${
computed.map { x => s"[${x.ruleId.value}]" }.mkString("")
};" + s" rule updated since run: ${
newStatus.map { x => s"${x.ruleId.value}" }.mkString("[", "][", "]")
}")
}
val t13 = System.currentTimeMillis
TimingDebugLogger.debug(s"Compliance: mergeCompareByRule global cost : ${t13-t0}ms")
(computed ::: newStatus).toSet
}
private[this] def buildUnexpectedReports(mergeInfo: MergeInfo, reports: Seq[Reports]): Set[RuleNodeStatusReport] = {
reports.groupBy(x => x.ruleId).map { case (ruleId, seq) =>
RuleNodeStatusReport(
mergeInfo.nodeId
, ruleId
, mergeInfo.run
, mergeInfo.configId
, DirectiveStatusReport.merge(buildUnexpectedDirectives(seq))
, mergeInfo.expirationTime
)
}.toSet
}
/**
* Build unexpected reports for the given reports
*/
private[this] def buildUnexpectedDirectives(reports: Seq[Reports]): Seq[DirectiveStatusReport] = {
reports.map { r =>
DirectiveStatusReport(r.directiveId, Map(r.component ->
ComponentStatusReport(r.component, Map(r.keyValue ->
ComponentValueStatusReport(r.keyValue, r.keyValue, MessageStatusReport(ReportType.Unexpected, r.message) :: Nil)
)))
)
}
}
private[reports] def buildRuleNodeStatusReport(
mergeInfo : MergeInfo
, expectedReports: NodeExpectedReports
, status : ReportType
, message : String = ""
): Set[RuleNodeStatusReport] = {
expectedReports.ruleExpectedReports.map { case RuleExpectedReports(ruleId, directives) =>
val d = directives.map { d =>
DirectiveStatusReport(d.directiveId,
d.components.map { c =>