Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 196 additions & 41 deletions python/ql/src/semmle/python/security/strings/External.qll
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,40 @@ import python
import Basic
private import Common

/** An extensible kind of taint representing an externally controlled string.
/**
* An extensible kind of taint representing an externally controlled string.
*/
abstract class ExternalStringKind extends StringKind {

bindingset[this]
ExternalStringKind() {
this = this
}
ExternalStringKind() { this = this }

override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
result = StringKind.super.getTaintForFlowStep(fromnode, tonode)
or
tonode.(SequenceNode).getElement(_) = fromnode and result.(ExternalStringSequenceKind).getItem() = this
tonode.(SequenceNode).getElement(_) = fromnode and
result.(ExternalStringSequenceKind).getItem() = this
or
json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this
or
tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this
or
urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this
or
urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this
}

}

/** A kind of "taint", representing a sequence, with a "taint" member */
class ExternalStringSequenceKind extends SequenceKind {

ExternalStringSequenceKind() {
this.getItem() instanceof ExternalStringKind
}

ExternalStringSequenceKind() { this.getItem() instanceof ExternalStringKind }
}

/** An hierachical dictionary or list where the entire structure is externally controlled
/**
* An hierachical dictionary or list where the entire structure is externally controlled
* This is typically a parsed JSON object.
*/
class ExternalJsonKind extends TaintKind {

ExternalJsonKind() {
this = "json[" + any(ExternalStringKind key) + "]"
}

ExternalJsonKind() { this = "json[" + any(ExternalStringKind key) + "]" }

/** Gets the taint kind for item in this sequence */
TaintKind getValue() {
Expand All @@ -54,65 +49,225 @@ class ExternalJsonKind extends TaintKind {
json_subscript_taint(tonode, fromnode, this, result)
or
result = this and copy_call(fromnode, tonode)
}
}

override TaintKind getTaintOfMethodResult(string name) {
name = "get" and result = this.getValue()
}

}
}

/** A kind of "taint", representing a dictionary mapping str->"taint" */
class ExternalStringDictKind extends DictKind {
ExternalStringDictKind() { this.getValue() instanceof ExternalStringKind }
}

ExternalStringDictKind() {
this.getValue() instanceof ExternalStringKind
/**
* A kind of "taint", representing a dictionary mapping strings to sequences of
* tainted strings
*/
class ExternalStringSequenceDictKind extends DictKind {
ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind }
}

/** TaintKind for the result of `urlsplit(tainted_string)` */
class ExternalUrlSplitResult extends ExternalStringSequenceKind {
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
override TaintKind getTaintOfAttribute(string name) {
result = super.getTaintOfAttribute(name)
or
(
// namedtuple field names
name = "scheme" or
name = "netloc" or
name = "path" or
name = "query" or
name = "fragment" or
// class methods
name = "username" or
name = "password" or
name = "hostname"
) and
result instanceof ExternalStringKind
}

override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "geturl" and
result instanceof ExternalStringKind
}
}

/** A kind of "taint", representing a dictionary mapping strings to sequences of
* tainted strings */
/** TaintKind for the result of `urlparse(tainted_string)` */
class ExternalUrlParseResult extends ExternalStringSequenceKind {
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
override TaintKind getTaintOfAttribute(string name) {
result = super.getTaintOfAttribute(name)
or
(
// namedtuple field names
name = "scheme" or
name = "netloc" or
name = "path" or
name = "params" or
name = "query" or
name = "fragment" or
// class methods
name = "username" or
name = "password" or
name = "hostname"
) and
result instanceof ExternalStringKind
}

class ExternalStringSequenceDictKind extends DictKind {
ExternalStringSequenceDictKind() {
this.getValue() instanceof ExternalStringSequenceKind
override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "geturl" and
result instanceof ExternalStringKind
}
}

/* Helper for getTaintForStep() */
pragma [noinline]
private predicate json_subscript_taint(SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key) {
pragma[noinline]
private predicate json_subscript_taint(
SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key
) {
sub.isLoad() and
sub.getValue() = obj and
key = seq.getValue()
}


private predicate json_load(ControlFlowNode fromnode, CallNode tonode) {
exists(FunctionObject json_loads |
ModuleObject::named("json").attr("loads") = json_loads and
json_loads.getACall() = tonode and tonode.getArg(0) = fromnode
json_loads.getACall() = tonode and
tonode.getArg(0) = fromnode
)
}

/** A kind of "taint", representing an open file-like object from an external source. */
class ExternalFileObject extends TaintKind {
private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) {
// This could be implemented as `exists(FunctionValue` without the explicit six part,
// but then our tests will need to import +100 modules, so for now this slightly
// altered version gets to live on.
exists(Value urlsplit |
(
urlsplit = Value::named("six.moves.urllib.parse.urlsplit")
or
// Python 2
urlsplit = Value::named("urlparse.urlsplit")
or
// Python 3
urlsplit = Value::named("urllib.parse.urlsplit")
) and
tonode = urlsplit.getACall() and
tonode.getArg(0) = fromnode
)
}

ExternalFileObject() {
this = "file[" + any(ExternalStringKind key) + "]"
}
private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) {
// This could be implemented as `exists(FunctionValue` without the explicit six part,
// but then our tests will need to import +100 modules, so for now this slightly
// altered version gets to live on.
exists(Value urlparse |
(
urlparse = Value::named("six.moves.urllib.parse.urlparse")
or
// Python 2
urlparse = Value::named("urlparse.urlparse")
or
// Python 3
urlparse = Value::named("urllib.parse.urlparse")
) and
tonode = urlparse.getACall() and
tonode.getArg(0) = fromnode
)
}

/** A kind of "taint", representing an open file-like object from an external source. */
class ExternalFileObject extends TaintKind {
ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" }

/** Gets the taint kind for the contents of this file */
TaintKind getValue() {
this = "file[" + result + "]"
}
TaintKind getValue() { this = "file[" + result + "]" }

override TaintKind getTaintOfMethodResult(string name) {
name = "read" and result = this.getValue()
}

}

/**
* Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until
* we have better support for namedtuples.
*
* Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test,
* all fields/indexes will be cleared of taint.
*
* Handles:
* - `if splitres.netloc == "KNOWN_VALUE"`
* - `if splitres[0] == "KNOWN_VALUE"`
*/
class UrlsplitUrlparseTempSanitizer extends Sanitizer {
// TODO: remove this once we have better support for named tuples

UrlsplitUrlparseTempSanitizer() { this = "UrlsplitUrlparseTempSanitizer" }

override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
(
taint instanceof ExternalUrlSplitResult
or
taint instanceof ExternalUrlParseResult
) and
exists(ControlFlowNode full_use |
full_use.(SubscriptNode).getObject() = test.getInput().getAUse()
or
full_use.(AttrNode).getObject() = test.getInput().getAUse()
|
clears_taint(_, full_use, test.getTest(), test.getSense())
)
}

private predicate clears_taint(ControlFlowNode final_test, ControlFlowNode tainted, ControlFlowNode test, boolean sense) {
test_equality_with_const(final_test, tainted, sense)
or
test_in_const_seq(final_test, tainted, sense)
or
test.(UnaryExprNode).getNode().getOp() instanceof Not and
exists(ControlFlowNode nested_test |
nested_test = test.(UnaryExprNode).getOperand() and
clears_taint(final_test, tainted, nested_test, sense.booleanNot())
)
}

/** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */
private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
exists(ControlFlowNode const, Cmpop op |
const.getNode() instanceof StrConst
|
(
cmp.operands(const, op, tainted)
or
cmp.operands(tainted, op, const)
) and
(
op instanceof Eq and sense = true
or
op instanceof NotEq and sense = false
)
)
}

/** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */
private predicate test_in_const_seq(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
exists(SequenceNode const_seq, Cmpop op |
forall(ControlFlowNode elem | elem = const_seq.getAnElement() | elem.getNode() instanceof StrConst)
|
cmp.operands(tainted, op, const_seq) and
(
op instanceof In and sense = true
or
op instanceof NotIn and sense = false
)
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:21 | Pi(urlsplit_res_0) [true] |
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:24 | Pi(urlsplit_res_3) [true] |
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:27 | Pi(urlsplit_res_6) [true] |
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:30 | Pi(urlsplit_res_9) [true] |
| string equality sanitizer | externally controlled string | test.py:21 | Pi(urlsplit_res_0) [true] |
| string equality sanitizer | externally controlled string | test.py:24 | Pi(urlsplit_res_3) [true] |
| string equality sanitizer | externally controlled string | test.py:27 | Pi(urlsplit_res_6) [true] |
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import python
import Taint

from Sanitizer s, TaintKind taint, PyEdgeRefinement test
where s.sanitizingEdge(taint, test)
select s, taint, test.getTest().getLocation().toString(), test.getRepresentation()
47 changes: 47 additions & 0 deletions python/ql/test/library-tests/taint/namedtuple/Taint.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import python
import semmle.python.security.TaintTracking
import semmle.python.security.strings.Untrusted

class SimpleSource extends TaintSource {
SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" }

override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind }

override string toString() { result = "taint source" }
}

class ListSource extends TaintSource {
ListSource() { this.(NameNode).getId() = "TAINTED_LIST" }

override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind }

override string toString() { result = "list taint source" }
}

class DictSource extends TaintSource {
DictSource() { this.(NameNode).getId() = "TAINTED_DICT" }

override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind }

override string toString() { result = "dict taint source" }
}

class TestConfig extends TaintTracking::Configuration {
TestConfig() { this = "TestConfig" }

override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof UrlsplitUrlparseTempSanitizer
}

override predicate isSource(TaintTracking::Source source) {
source instanceof SimpleSource
or
source instanceof ListSource
or
source instanceof DictSource
}

override predicate isSink(TaintTracking::Sink sink) {
none()
}
}
15 changes: 15 additions & 0 deletions python/ql/test/library-tests/taint/namedtuple/TestTaint.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
| test.py:13 | test_basic | a | externally controlled string |
| test.py:13 | test_basic | b | externally controlled string |
| test.py:13 | test_basic | c | externally controlled string |
| test.py:13 | test_basic | d | externally controlled string |
| test.py:13 | test_basic | urlsplit_res | [externally controlled string] |
| test.py:19 | test_sanitizer | Attribute | externally controlled string |
| test.py:22 | test_sanitizer | Attribute | NO TAINT |
| test.py:25 | test_sanitizer | Subscript | NO TAINT |
| test.py:28 | test_sanitizer | Attribute | NO TAINT |
| test.py:31 | test_sanitizer | Attribute | NO TAINT |
| test.py:34 | test_sanitizer | Attribute | externally controlled string |
| test.py:44 | test_namedtuple | a | NO TAINT |
| test.py:44 | test_namedtuple | b | NO TAINT |
| test.py:44 | test_namedtuple | c | NO TAINT |
| test.py:44 | test_namedtuple | d | NO TAINT |
Loading