From e16470a2ae5c8ad26b5f38a5a4618b9b7bb72481 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esbena@github.com>
Date: Wed, 16 Mar 2022 21:49:52 +0100
Subject: [PATCH 1/2] undo unsound performance optimizations

For an ordinary path-problem query, it is a requirement that at least one sink exists, otherwise there is nothing to alert on.
Thus the optimization with checking `isSink(_, this, _)` pruning in `Configuration::hasFlow` is sound.

For programs without any relevant sinks (which is likely to be the case when ATM is used), the `Configuration::hasFlow` calls will not hold due to the above pruning step.
The optimizations removed in this commit are thus unsound for programs without any relevant sinks.
---
 .../adaptivethreatmodeling/AdaptiveThreatModeling.qll      | 1 -
 .../adaptivethreatmodeling/EndpointScoring.qll             | 7 +++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
index 002a5c8fe8ea..12c282d6d98d 100644
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
@@ -34,7 +34,6 @@ module ATM {
      */
     pragma[inline]
     float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
-      any(DataFlow::Configuration cfg).hasFlow(source, sink) and
       shouldResultBeIncluded(source, sink) and
       result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink))
     }
diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
index a0d7fd528d3e..e70544a746c6 100644
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
@@ -23,9 +23,9 @@ module ModelScoring {
     RelevantFeaturizationConfig() { this = "RelevantFeaturization" }
 
     override DataFlow::Node getAnEndpointToFeaturize() {
-      getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _)
+      getCfg().isEffectiveSource(result)
       or
-      getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result)
+      getCfg().isEffectiveSink(result)
     }
   }
 
@@ -146,7 +146,6 @@ module Debugging {
   query predicate endpointScores = ModelScoring::endpointScores/3;
 
   query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
-    any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink) and
-    any(DataFlow::Configuration cfg).hasFlow(source, sink)
+    any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink)
   }
 }

From e94e1bb65bf5a4aca87426deb5b81e4f3257413e Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esbena@github.com>
Date: Thu, 17 Mar 2022 10:55:55 +0100
Subject: [PATCH 2/2] re-optimize without being unsound

---
 .../AdaptiveThreatModeling.qll                | 23 +++++++++++++++++++
 .../EndpointScoring.qll                       |  8 +++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
index 12c282d6d98d..e6d09981a752 100644
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/AdaptiveThreatModeling.qll
@@ -34,6 +34,7 @@ module ATM {
      */
     pragma[inline]
     float getScoreForFlow(DataFlow::Node source, DataFlow::Node sink) {
+      Optimizations::hasFlow(source, sink) and
       shouldResultBeIncluded(source, sink) and
       result = unique(float s | s = any(ScoringResults results).getScoreForFlow(source, sink))
     }
@@ -120,4 +121,26 @@ module ATM {
       )
     }
   }
+
+  /**
+   * Predicates for performance improvements that should not affect the semantics.
+   */
+  module Optimizations {
+    /**
+     * EXPERIMENTAL. This API may change in the future.
+     *
+     * Holds if data may flow from `source` to `sink` for *some* configuration.
+     *
+     * This is a variant of `Configuration::hasFlow`, that does not require the precense of a source and a sink.
+     */
+    predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+      exists(DataFlow::Configuration cfg |
+        exists(DataFlow::SourcePathNode flowsource, DataFlow::SinkPathNode flowsink |
+          cfg.hasFlowPath(flowsource, flowsink) and
+          source = flowsource.getNode() and
+          sink = flowsink.getNode()
+        )
+      )
+    }
+  }
 }
diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
index e70544a746c6..f3906e7dfb84 100644
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll
@@ -9,6 +9,7 @@ private import BaseScoring
 private import EndpointFeatures as EndpointFeatures
 private import FeaturizationConfig
 private import EndpointTypes
+private import AdaptiveThreatModeling
 
 private string getACompatibleModelChecksum() {
   availableMlModels(result, "javascript", _, "atm-endpoint-scoring")
@@ -23,9 +24,11 @@ module ModelScoring {
     RelevantFeaturizationConfig() { this = "RelevantFeaturization" }
 
     override DataFlow::Node getAnEndpointToFeaturize() {
-      getCfg().isEffectiveSource(result)
+      getCfg().isEffectiveSource(result) and
+      ATM::Optimizations::hasFlow(result, _)
       or
-      getCfg().isEffectiveSink(result)
+      getCfg().isEffectiveSink(result) and
+      ATM::Optimizations::hasFlow(_, result)
     }
   }
 
@@ -146,6 +149,7 @@ module Debugging {
   query predicate endpointScores = ModelScoring::endpointScores/3;
 
   query predicate shouldResultBeIncluded(DataFlow::Node source, DataFlow::Node sink) {
+    ATM::Optimizations::hasFlow(source, sink) and
     any(ScoringResults scoringResults).shouldResultBeIncluded(source, sink)
   }
 }