diff --git a/change-notes/1.25/analysis-python.md b/change-notes/1.25/analysis-python.md index 5d0fc69ec808..7a9c437a093c 100644 --- a/change-notes/1.25/analysis-python.md +++ b/change-notes/1.25/analysis-python.md @@ -20,3 +20,4 @@ The following changes in version 1.25 affect Python analysis in all applications ## Changes to libraries * Importing `semmle.python.web.HttpRequest` will no longer import `UntrustedStringKind` transitively. `UntrustedStringKind` is the most commonly used non-abstract subclass of `ExternalStringKind`. If not imported (by one mean or another), taint-tracking queries that concern `ExternalStringKind` will not produce any results. Please ensure such queries contain an explicit import (`import semmle.python.security.strings.Untrusted`). +* Added support for tainted f-strings. diff --git a/python/ql/src/semmle/python/security/strings/Basic.qll b/python/ql/src/semmle/python/security/strings/Basic.qll index 345f7d0b82b2..05d46728686b 100755 --- a/python/ql/src/semmle/python/security/strings/Basic.qll +++ b/python/ql/src/semmle/python/security/strings/Basic.qll @@ -27,7 +27,8 @@ abstract class StringKind extends TaintKind { os_path_join(fromnode, tonode) or str_format(fromnode, tonode) or encode_decode(fromnode, tonode) or - to_str(fromnode, tonode) + to_str(fromnode, tonode) or + f_string(fromnode, tonode) ) or result = this and copy_call(fromnode, tonode) @@ -61,13 +62,13 @@ private class StringEqualitySanitizer extends Sanitizer { } } -/* tonode = ....format(fromnode) */ +/** tonode = ....format(fromnode) */ private predicate str_format(ControlFlowNode fromnode, CallNode tonode) { tonode.getFunction().(AttrNode).getName() = "format" and tonode.getAnArg() = fromnode } -/* tonode = codec.[en|de]code(fromnode)*/ +/** tonode = codec.[en|de]code(fromnode) */ private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) { exists(FunctionObject func, string name | not func.getFunction().isMethod() and @@ -81,7 +82,7 @@ private predicate encode_decode(ControlFlowNode fromnode, CallNode tonode) { ) } -/* tonode = str(fromnode)*/ +/** tonode = str(fromnode) */ private predicate to_str(ControlFlowNode fromnode, CallNode tonode) { tonode.getAnArg() = fromnode and ( @@ -91,7 +92,7 @@ private predicate to_str(ControlFlowNode fromnode, CallNode tonode) { ) } -/* tonode = fromnode[:] */ +/** tonode = fromnode[:] */ private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) { exists(Slice all | all = tonode.getIndex().getNode() and @@ -101,12 +102,17 @@ private predicate slice(ControlFlowNode fromnode, SubscriptNode tonode) { ) } -/* tonode = os.path.join(..., fromnode, ...) */ +/** tonode = os.path.join(..., fromnode, ...) */ private predicate os_path_join(ControlFlowNode fromnode, CallNode tonode) { tonode = Value::named("os.path.join").getACall() and tonode.getAnArg() = fromnode } +/** tonode = f"... {fromnode} ..." */ +private predicate f_string(ControlFlowNode fromnode, ControlFlowNode tonode) { + tonode.getNode().(Fstring).getAValue() = fromnode.getNode() +} + /** * A kind of "taint", representing a dictionary mapping str->"taint" * diff --git a/python/ql/test/3/library-tests/taint/strings/Taint.qll b/python/ql/test/3/library-tests/taint/strings/Taint.qll new file mode 100644 index 000000000000..3368a1c4f709 --- /dev/null +++ b/python/ql/test/3/library-tests/taint/strings/Taint.qll @@ -0,0 +1,44 @@ +import python +import semmle.python.dataflow.TaintTracking +import semmle.python.security.strings.Untrusted +import semmle.python.security.Exceptions + +class SimpleSource extends TaintSource { + SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind } + + override string toString() { result = "taint source" } +} + +class ListSource extends TaintSource { + ListSource() { this.(NameNode).getId() = "TAINTED_LIST" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind } + + override string toString() { result = "list taint source" } +} + +class DictSource extends TaintSource { + DictSource() { this.(NameNode).getId() = "TAINTED_DICT" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind } + + override string toString() { result = "dict taint source" } +} + +class ExceptionInfoSource extends TaintSource { + ExceptionInfoSource() { this.(NameNode).getId() = "TAINTED_EXCEPTION_INFO" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExceptionInfo } + + override string toString() { result = "Exception info source" } +} + +class ExternalFileObjectSource extends TaintSource { + ExternalFileObjectSource() { this.(NameNode).getId() = "TAINTED_FILE" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalFileObject } + + override string toString() { result = "Tainted file source" } +} diff --git a/python/ql/test/3/library-tests/taint/strings/TestTaint.expected b/python/ql/test/3/library-tests/taint/strings/TestTaint.expected new file mode 100644 index 000000000000..3e5e988f8258 --- /dev/null +++ b/python/ql/test/3/library-tests/taint/strings/TestTaint.expected @@ -0,0 +1 @@ +| test.py:4 | ok | fstring | Fstring | externally controlled string | diff --git a/python/ql/test/3/library-tests/taint/strings/TestTaint.ql b/python/ql/test/3/library-tests/taint/strings/TestTaint.ql new file mode 100644 index 000000000000..7b9c6025c9df --- /dev/null +++ b/python/ql/test/3/library-tests/taint/strings/TestTaint.ql @@ -0,0 +1,33 @@ +import python +import semmle.python.security.TaintTracking +import semmle.python.web.HttpRequest +import semmle.python.security.strings.Untrusted +import Taint + +from + Call call, Expr arg, boolean expected_taint, boolean has_taint, string test_res, + string taint_string +where + call.getLocation().getFile().getShortName() = "test.py" and + ( + call.getFunc().(Name).getId() = "ensure_tainted" and + expected_taint = true + or + call.getFunc().(Name).getId() = "ensure_not_tainted" and + expected_taint = false + ) and + arg = call.getAnArg() and + ( + not exists(TaintedNode tainted | tainted.getAstNode() = arg) and + taint_string = "" and + has_taint = false + or + exists(TaintedNode tainted | tainted.getAstNode() = arg | + taint_string = tainted.getTaintKind().toString() + ) and + has_taint = true + ) and + if expected_taint = has_taint then test_res = "ok " else test_res = "fail" +// if expected_taint = has_taint then test_res = "✓" else test_res = "✕" +select arg.getLocation().toString(), test_res, call.getScope().(Function).getName(), arg.toString(), + taint_string diff --git a/python/ql/test/3/library-tests/taint/strings/test.py b/python/ql/test/3/library-tests/taint/strings/test.py new file mode 100644 index 000000000000..e1c25b5dcccf --- /dev/null +++ b/python/ql/test/3/library-tests/taint/strings/test.py @@ -0,0 +1,5 @@ +def fstring(): + tainted_string = TAINTED_STRING + ensure_tainted( + f"foo {tainted_string} bar" + )