From e16470a2ae5c8ad26b5f38a5a4618b9b7bb72481 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Wed, 16 Mar 2022 21:49:52 +0100 Subject: [PATCH 1/2] undo unsound performance optimizations For an ordinary path-problem query, it is a requirement that at least one sink exists, otherwise there is nothing to alert on. Thus the optimization with checking `isSink(_, this, _)` pruning in `Configuration::hasFlow` is sound. For programs without any relevant sinks (which is likely to be the case when ATM is used), the `Configuration::hasFlow` calls will not hold due to the above pruning step. The optimizations removed in this commit are thus unsound for programs without any relevant sinks. --- .../adaptivethreatmodeling/AdaptiveThreatModeling.qll | 1 - .../adaptivethreatmodeling/EndpointScoring.qll | 7 +++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll index 002a5c8fe8ea..12c282d6d98d 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll @@ -34,7 +34,6 @@ module ATM { */ pragma[inline] float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) { - any(DataFlow::Configuration cfg).hasFlow(source, sink) and shouldResultBeIncluded(source, sink) and result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink)) } diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll index a0d7fd528d3e..e70544a746c6 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll @@ -23,9 +23,9 @@ module ModelScoring { RelevantFeaturizationConfig() { this = "RelevantFeaturization" } override DataFlow::Node getAnEndpointToFeaturize() { - getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _) + getCfg().isEffectiveSource(result) or - getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result) + getCfg().isEffectiveSink(result) } } @@ -146,7 +146,6 @@ module Debugging { query predicate endpointScores = ModelScoring::endpointScores/3; query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) { - any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and - any(DataFlow::Configuration cfg).hasFlow(source, sink) + any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) } } From e94e1bb65bf5a4aca87426deb5b81e4f3257413e Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 17 Mar 2022 10:55:55 +0100 Subject: [PATCH 2/2] re-optimize without being unsound --- .../AdaptiveThreatModeling.qll | 23 +++++++++++++++++++ .../EndpointScoring.qll | 8 +++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll index 12c282d6d98d..e6d09981a752 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll @@ -34,6 +34,7 @@ module ATM { */ pragma[inline] float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) { + Optimizations::hasFlow(source, sink) and shouldResultBeIncluded(source, sink) and result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink)) } @@ -120,4 +121,26 @@ module ATM { ) } } + + /** + * Predicates for performance improvements that should not affect the semantics. + */ + module Optimizations { + /** + * EXPERIMENTAL. This API may change in the future. + * + * Holds if data may flow from `source` to `sink` for *some* configuration. + * + * This is a variant of `Configuration::hasFlow`, that does not require the precense of a source and a sink. + */ + predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + exists(DataFlow::Configuration cfg | + exists(DataFlow::SourcePathNode flowsource, DataFlow::SinkPathNode flowsink | + cfg.hasFlowPath(flowsource, flowsink) and + source = flowsource.getNode() and + sink = flowsink.getNode() + ) + ) + } + } } diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll index e70544a746c6..f3906e7dfb84 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll @@ -9,6 +9,7 @@ private import BaseScoring private import EndpointFeatures as EndpointFeatures private import FeaturizationConfig private import EndpointTypes +private import AdaptiveThreatModeling private string getACompatibleModelChecksum() { availableMlModels(result, "javascript", _, "atm-endpoint-scoring") @@ -23,9 +24,11 @@ module ModelScoring { RelevantFeaturizationConfig() { this = "RelevantFeaturization" } override DataFlow::Node getAnEndpointToFeaturize() { - getCfg().isEffectiveSource(result) + getCfg().isEffectiveSource(result) and + ATM::Optimizations::hasFlow(result, _) or - getCfg().isEffectiveSink(result) + getCfg().isEffectiveSink(result) and + ATM::Optimizations::hasFlow(_, result) } } @@ -146,6 +149,7 @@ module Debugging { query predicate endpointScores = ModelScoring::endpointScores/3; query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) { + ATM::Optimizations::hasFlow(source, sink) and any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) } }