Skip to content

ATM: Extract training data #11263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ abstract class EndpointCharacteristic extends string {
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
);

/** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
final float getHighConfidenceThreshold() { result = 0.8 }

// The following are some confidence values that are used in practice by the subclasses. They are defined as named
// constants here to make it easier to change them in the future.
final float maximalConfidence() { result = 1.0 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import javascript
import experimental.adaptivethreatmodeling.ATMConfig
import extraction.ExtractEndpointData
import extraction.ExtractEndpointDataTraining

string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
query instanceof NosqlInjectionQuery and
Expand All @@ -33,7 +33,7 @@ string getDescriptionForAlertCandidate(
) {
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
or
getAtmCfg(query).isKnownSink(sinkCandidate) and
getDataFlowCfg(query).(AtmConfig).isKnownSink(sinkCandidate) and
result = "excluded[reason=known-sink]"
or
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,8 @@
* Extracts training data we can use to train ML models for ML-powered queries.
*/

import javascript
import ExtractEndpointData as ExtractEndpointData
private import ExtractEndpointDataTraining as ExtractEndpointDataTraining

query predicate endpoints(
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
) {
ExtractEndpointData::endpoints(endpoint, queryName, key, value, valueType) and
// only select endpoints that are either Sink or NotASink
ExtractEndpointData::endpoints(endpoint, queryName, "sinkLabel", ["Sink", "NotASink"], "string") and
// do not select endpoints filtered out by end-to-end evaluation
ExtractEndpointData::endpoints(endpoint, queryName, "isExcludedFromEndToEndEvaluation", "false",
"boolean") and
// only select endpoints that can be part of a tainted flow
ExtractEndpointData::endpoints(endpoint, queryName, "isConstantExpression", "false", "boolean")
}
query predicate endpoints = ExtractEndpointDataTraining::reformattedTrainingEndpoints/5;

query predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
endpoints(endpoint, _, _, _, _) and
ExtractEndpointData::tokenFeatures(endpoint, featureName, featureValue)
}
query predicate tokenFeatures = ExtractEndpointDataTraining::tokenFeatures/3;
Loading