Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* @name Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/unsafe-deserialization
* @problem.severity error
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-502
* security
* serialization
*/

import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph

class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }

override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

override predicate isSink(DataFlow::Node sink) {
exists(Decoding d |
d.mayExecuteInput() and
sink = d.getAnInput()
)
}
}

from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
57 changes: 57 additions & 0 deletions python/ql/src/experimental/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,63 @@ module SystemCommandExecution {
}
}

/**
* A data-flow node that decodes data from a binary or textual format. This
* is intended to include deserialization, unmarshalling, decoding, unpickling,
* decompressing, decrypting, parsing etc.
*
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it allows code execution or could result in denial-of-service.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Decoding::Range` instead.
*/
class Decoding extends DataFlow::Node {
Decoding::Range range;

Decoding() { this = range }

/** Holds if this call may execute code embedded in its input. */
predicate mayExecuteInput() { range.mayExecuteInput() }

/** Gets an input that is decoded by this function. */
DataFlow::Node getAnInput() { result = range.getAnInput() }

/** Gets the output that contains the decoded data produced by this function. */
DataFlow::Node getOutput() { result = range.getOutput() }

/** Gets an identifier for the format this function decodes from, such as "JSON". */
string getFormat() { result = range.getFormat() }
}

/** Provides a class for modeling new decoding mechanisms. */
module Decoding {
/**
* A data-flow node that decodes data from a binary or textual format. This
* is intended to include deserialization, unmarshalling, decoding, unpickling,
* decompressing, decrypting, parsing etc.
*
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it allows code execution or could result in denial-of-service.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Decoding` instead.
*/
abstract class Range extends DataFlow::Node {
/** Holds if this call may execute code embedded in its input. */
abstract predicate mayExecuteInput();

/** Gets an input that is decoded by this function. */
abstract DataFlow::Node getAnInput();

/** Gets the output that contains the decoded data produced by this function. */
abstract DataFlow::Node getOutput();

/** Gets an identifier for the format this function decodes from, such as "JSON". */
abstract string getFormat();
}
}

/**
* A data-flow node that dynamically executes Python code.
*
Expand Down
2 changes: 2 additions & 0 deletions python/ql/src/experimental/semmle/python/Frameworks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
* Helper file that imports all framework modeling.
*/

private import experimental.semmle.python.frameworks.Dill
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Invoke
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Yaml
58 changes: 58 additions & 0 deletions python/ql/src/experimental/semmle/python/frameworks/Dill.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/**
* Provides classes modeling security-relevant aspects of the 'dill' package.
* See https://pypi.org/project/dill/.
*/

private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts

private module Dill {
/** Gets a reference to the `dill` module. */
private DataFlow::Node dill(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("dill")
or
exists(DataFlow::TypeTracker t2 | result = dill(t2).track(t2, t))
}

/** Gets a reference to the `dill` module. */
DataFlow::Node dill() { result = dill(DataFlow::TypeTracker::end()) }

/** Provides models for the `dill` module. */
module dill {
/** Gets a reference to the `dill.loads` function. */
private DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("dill.loads")
or
t.startInAttr("loads") and
result = dill()
or
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
}

/** Gets a reference to the `dill.loads` function. */
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
}
}

/**
* A call to `dill.loads`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
*/
private class DillLoadsCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;

DillLoadsCall() { node.getFunction() = Dill::dill::loads().asCfgNode() }

override predicate mayExecuteInput() { any() }

override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

override DataFlow::Node getOutput() { result = this }

override string getFormat() { result = "dill" }
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
*/
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
private DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask")
or
Expand All @@ -31,7 +31,7 @@ private module Flask {
/** Provides models for the `flask` module. */
module flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request(DataFlow::TypeTracker t) {
private DataFlow::Node request(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.request")
or
Expand Down
100 changes: 100 additions & 0 deletions python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,106 @@ private module Stdlib {
}
}

// ---------------------------------------------------------------------------
// marshal
// ---------------------------------------------------------------------------
/** Gets a reference to the `marshal` module. */
private DataFlow::Node marshal(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("marshal")
or
exists(DataFlow::TypeTracker t2 | result = marshal(t2).track(t2, t))
}

/** Gets a reference to the `marshal` module. */
DataFlow::Node marshal() { result = marshal(DataFlow::TypeTracker::end()) }

/** Provides models for the `marshal` module. */
module marshal {
/** Gets a reference to the `marshal.loads` function. */
private DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("marshal.loads")
or
t.startInAttr("loads") and
result = marshal()
or
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
}

/** Gets a reference to the `marshal.loads` function. */
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
}

/**
* A call to `marshal.loads`
* See https://docs.python.org/3/library/marshal.html#marshal.loads
*/
private class MarshalLoadsCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;

MarshalLoadsCall() { node.getFunction() = marshal::loads().asCfgNode() }

override predicate mayExecuteInput() { any() }

override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

override DataFlow::Node getOutput() { result = this }

override string getFormat() { result = "marshal" }
}

// ---------------------------------------------------------------------------
// pickle
// ---------------------------------------------------------------------------
private string pickleModuleName() { result in ["pickle", "cPickle", "_pickle"] }

/** Gets a reference to the `pickle` module. */
private DataFlow::Node pickle(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode(pickleModuleName())
or
exists(DataFlow::TypeTracker t2 | result = pickle(t2).track(t2, t))
}

/** Gets a reference to the `pickle` module. */
DataFlow::Node pickle() { result = pickle(DataFlow::TypeTracker::end()) }

/** Provides models for the `pickle` module. */
module pickle {
/** Gets a reference to the `pickle.loads` function. */
private DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode(pickleModuleName() + ".loads")
or
t.startInAttr("loads") and
result = pickle()
or
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
}

/** Gets a reference to the `pickle.loads` function. */
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
}

/**
* A call to `pickle.loads`
* See https://docs.python.org/3/library/pickle.html#pickle.loads
*/
private class PickleLoadsCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;

PickleLoadsCall() { node.getFunction() = pickle::loads().asCfgNode() }

override predicate mayExecuteInput() { any() }

override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

override DataFlow::Node getOutput() { result = this }

override string getFormat() { result = "pickle" }
}

// ---------------------------------------------------------------------------
// popen2
// ---------------------------------------------------------------------------
Expand Down
98 changes: 98 additions & 0 deletions python/ql/src/experimental/semmle/python/frameworks/Yaml.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Provides classes modeling security-relevant aspects of the PyYAML package
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
*/

private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts

private module Yaml {
/** Gets a reference to the `yaml` module. */
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("yaml")
or
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
}

/** Gets a reference to the `yaml` module. */
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }

/** Provides models for the `yaml` module. */
module yaml {
/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["load", "SafeLoader", "BaseLoader"] and
(
t.start() and
result = DataFlow::importNode("yaml." + attr_name)
or
t.startInAttr(attr_name) and
result = yaml()
)
or
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
yaml_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}

pragma[nomagic]
private predicate yaml_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
}

/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
DataFlow::Node yaml_attr(string attr_name) {
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
}
}
}

/**
* A call to `yaml.load`
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;

YamlLoadCall() { node.getFunction() = Yaml::yaml::yaml_attr("load").asCfgNode() }

/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
*/
override predicate mayExecuteInput() {
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not node.getArgByName("Loader") =
Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]).asCfgNode()
}

override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

override DataFlow::Node getOutput() { result = this }

override string getFormat() { result = "YAML" }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest
Loading