Skip to content

Python: Use TUnknown as the result of calls to methods with unknown return types #2915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
68 changes: 48 additions & 20 deletions python/ql/src/semmle/python/objects/Callables.qll
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,10 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc
cls = ObjectInternal::builtin("bool") and obj = ObjectInternal::bool(_)
) and
origin = CfgOrigin::unknown()
or
this.returnTypeUnknown() and
obj = ObjectInternal::unknown() and
origin = CfgOrigin::unknown()
}

override ControlFlowNode getOrigin() {
Expand All @@ -231,26 +235,15 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc

Builtin getReturnType() {
exists(Builtin func |
func = this.getBuiltin() |
/* Enumerate the types of a few builtin functions, that the CPython analysis misses. */
func = Builtin::builtin("hex") and result = Builtin::special("str")
or
func = Builtin::builtin("oct") and result = Builtin::special("str")
or
func = Builtin::builtin("intern") and result = Builtin::special("str")
or
func = Builtin::builtin("__import__") and result = Builtin::special("ModuleType")
or
/* Fix a few minor inaccuracies in the CPython analysis */
ext_rettype(func, result) and not (
func = Builtin::builtin("__import__")
or
func = Builtin::builtin("compile") and result = Builtin::special("NoneType")
or
func = Builtin::builtin("sum")
or
func = Builtin::builtin("filter")
)
func = this.getBuiltin() and
result = getBuiltinFunctionReturnType(func)
)
}

private predicate returnTypeUnknown() {
exists(Builtin func |
func = this.getBuiltin() and
not exists(getBuiltinFunctionReturnType(func))
)
}

Expand Down Expand Up @@ -293,7 +286,30 @@ class BuiltinFunctionObjectInternal extends CallableObjectInternal, TBuiltinFunc

}

private Builtin getBuiltinFunctionReturnType(Builtin func) {
/* Enumerate the types of a few builtin functions, that the CPython analysis misses. */
func = Builtin::builtin("hex") and result = Builtin::special("str")
or
func = Builtin::builtin("oct") and result = Builtin::special("str")
or
func = Builtin::builtin("intern") and result = Builtin::special("str")
or
func = Builtin::builtin("__import__") and result = Builtin::special("ModuleType")
or
/* Fix a few minor inaccuracies in the CPython analysis */
ext_rettype(func, result) and not (
func = Builtin::builtin("__import__")
or
func = Builtin::builtin("compile") and result = Builtin::special("NoneType")
or
func = Builtin::builtin("sum")
or
func = Builtin::builtin("filter")
)
}

/** Class representing methods of built-in classes (otherwise known as method-descriptors) such as `list.append`. */

class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethodObject {

override Builtin getBuiltin() {
Expand Down Expand Up @@ -328,15 +344,27 @@ class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethod
cls = ObjectInternal::builtin("bool") and obj = ObjectInternal::bool(_)
) and
origin = CfgOrigin::unknown()
or
this.returnTypeUnknown() and
obj = ObjectInternal::unknown() and
origin = CfgOrigin::unknown()
}

Builtin getReturnType() {
/* If we have a record of the return type in our stubs, use that. */
exists(Builtin func |
func = this.getBuiltin() |
ext_rettype(func, result)
)
}

private predicate returnTypeUnknown() {
exists(Builtin func |
func = this.getBuiltin() |
not ext_rettype(func, _)
)
}

override ControlFlowNode getOrigin() {
none()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
| 112 | multi_return | builtin-class int |
| 118 | do_something | builtin-class int |
| 123 | with_flow | builtin-class int |
| 128 | return_default | builtin-class tuple |
| 128 | return_default | builtin-class tuple |
2 changes: 2 additions & 0 deletions python/ql/test/library-tests/PointsTo/new/Call.expected
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
| l_calls.py:51 | ControlFlowNode for g() | g |
| l_calls.py:52 | ControlFlowNode for Attribute() | F.m |
| l_calls.py:53 | ControlFlowNode for Attribute() | F.m |
| l_calls.py:59 | ControlFlowNode for Attribute() | int.bit_length |
| l_calls.py:63 | ControlFlowNode for Attribute() | dict.get |
| q_super.py:4 | ControlFlowNode for Attribute() | object.__init__ |
| q_super.py:12 | ControlFlowNode for Attribute() | Base2.__init__ |
| q_super.py:22 | ControlFlowNode for Attribute() | Base1.meth |
Expand Down
2 changes: 2 additions & 0 deletions python/ql/test/library-tests/PointsTo/new/NameSpace.expected
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@
| l_calls.py:0 | Module code.l_calls | E | class E |
| l_calls.py:0 | Module code.l_calls | F | class F |
| l_calls.py:0 | Module code.l_calls | Owner | class Owner |
| l_calls.py:0 | Module code.l_calls | a | Builtin-method bit_length |
| l_calls.py:0 | Module code.l_calls | bar | Function bar |
| l_calls.py:0 | Module code.l_calls | c | Builtin-method get |
| l_calls.py:0 | Module code.l_calls | f | Function f |
| l_calls.py:0 | Module code.l_calls | foo | Function foo |
| l_calls.py:0 | Module code.l_calls | g | Function g |
Expand Down
Empty file.
26 changes: 26 additions & 0 deletions python/ql/test/library-tests/PointsTo/new/PointsToMissing.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import python
import Util
import semmle.python.pointsto.PointsTo
import semmle.python.objects.ObjectInternal

/* This test should return _no_ results. */

predicate relevant_node(ControlFlowNode n) {
exists(CallNode c |
c.getFunction().(NameNode).getId() = "check" and
n = c.getAnArg()
)
or
exists(Comment c, string filepath, int bl |
n.getNode().getScope().getLocation().hasLocationInfo(filepath, bl, _, _, _) and
c.getLocation().hasLocationInfo(filepath, bl, _, _, _) and
c.getText().matches("%check")
and not n.(NameNode).isStore()
)
}

from ControlFlowNode f
where
relevant_node(f) and
not PointsTo::pointsTo(f, _, _, _)
select locate(f.getLocation(), "abchlr"), f.toString()
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,25 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P
| l_calls.py:53 | ControlFlowNode for Attribute() | 'b' | builtin-class str | 53 | import |
| l_calls.py:53 | ControlFlowNode for F | class F | builtin-class type | 47 | import |
| l_calls.py:53 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 | import |
| l_calls.py:58 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 58 | import |
| l_calls.py:58 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 | import |
| l_calls.py:58 | ControlFlowNode for int | builtin-class int | builtin-class type | 58 | import |
| l_calls.py:59 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 59 | import |
| l_calls.py:59 | ControlFlowNode for Attribute() | Attribute() | builtin-class int | 59 | import |
| l_calls.py:59 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 59 | import |
| l_calls.py:59 | ControlFlowNode for b | Attribute() | builtin-class int | 59 | import |
| l_calls.py:59 | ControlFlowNode for int | builtin-class int | builtin-class type | 59 | import |
| l_calls.py:62 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 62 | import |
| l_calls.py:62 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 | import |
| l_calls.py:62 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 62 | import |
| l_calls.py:63 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 63 | import |
| l_calls.py:63 | ControlFlowNode for Dict | Dict | builtin-class dict | 63 | import |
| l_calls.py:63 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 63 | import |
| l_calls.py:63 | ControlFlowNode for Str | 'foo' | builtin-class str | 63 | import |
| l_calls.py:63 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 63 | import |
| l_calls.py:64 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 | import |
| l_calls.py:64 | ControlFlowNode for b | Attribute() | builtin-class int | 59 | import |
| l_calls.py:64 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 | import |
| m_attributes.py:3 | ControlFlowNode for C | class C | builtin-class type | 3 | import |
| m_attributes.py:3 | ControlFlowNode for ClassExpr | class C | builtin-class type | 3 | import |
| m_attributes.py:3 | ControlFlowNode for object | builtin-class object | builtin-class type | 3 | import |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,25 @@ WARNING: Predicate points_to has been deprecated and may be removed in future (P
| l_calls.py:53 | ControlFlowNode for Attribute() | 'b' | builtin-class str | 53 |
| l_calls.py:53 | ControlFlowNode for F | class F | builtin-class type | 47 |
| l_calls.py:53 | ControlFlowNode for t | Tuple | builtin-class tuple | 41 |
| l_calls.py:58 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 58 |
| l_calls.py:58 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 |
| l_calls.py:58 | ControlFlowNode for int | builtin-class int | builtin-class type | 58 |
| l_calls.py:59 | ControlFlowNode for Attribute | Builtin-method bit_length | builtin-class method_descriptor | 59 |
| l_calls.py:59 | ControlFlowNode for Attribute() | Attribute() | builtin-class int | 59 |
| l_calls.py:59 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 59 |
| l_calls.py:59 | ControlFlowNode for b | Attribute() | builtin-class int | 59 |
| l_calls.py:59 | ControlFlowNode for int | builtin-class int | builtin-class type | 59 |
| l_calls.py:62 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 62 |
| l_calls.py:62 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 |
| l_calls.py:62 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 62 |
| l_calls.py:63 | ControlFlowNode for Attribute | Builtin-method get | builtin-class method_descriptor | 63 |
| l_calls.py:63 | ControlFlowNode for Dict | Dict | builtin-class dict | 63 |
| l_calls.py:63 | ControlFlowNode for IntegerLiteral | int 5 | builtin-class int | 63 |
| l_calls.py:63 | ControlFlowNode for Str | 'foo' | builtin-class str | 63 |
| l_calls.py:63 | ControlFlowNode for dict | builtin-class dict | builtin-class type | 63 |
| l_calls.py:64 | ControlFlowNode for a | Builtin-method bit_length | builtin-class method_descriptor | 58 |
| l_calls.py:64 | ControlFlowNode for b | Attribute() | builtin-class int | 59 |
| l_calls.py:64 | ControlFlowNode for c | Builtin-method get | builtin-class method_descriptor | 62 |
| s_scopes.py:4 | ControlFlowNode for True | bool True | builtin-class bool | 4 |
| s_scopes.py:4 | ControlFlowNode for float | bool True | builtin-class bool | 4 |
| s_scopes.py:7 | ControlFlowNode for C2 | class C2 | builtin-class type | 7 |
Expand Down
16 changes: 16 additions & 0 deletions python/ql/test/library-tests/PointsTo/new/Values.expected
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,22 @@
| l_calls.py:53 | ControlFlowNode for Attribute() | import | 'b' | builtin-class str |
| l_calls.py:53 | ControlFlowNode for F | import | class F | builtin-class type |
| l_calls.py:53 | ControlFlowNode for t | import | ('a', 'b', 'c', ) | builtin-class tuple |
| l_calls.py:58 | ControlFlowNode for Attribute | import | builtin method bit_length | builtin-class method_descriptor |
| l_calls.py:58 | ControlFlowNode for int | import | builtin-class int | builtin-class type |
| l_calls.py:59 | ControlFlowNode for Attribute | import | builtin method bit_length | builtin-class method_descriptor |
| l_calls.py:59 | ControlFlowNode for Attribute() | import | instance of int | builtin-class int |
| l_calls.py:59 | ControlFlowNode for IntegerLiteral | import | int 5 | builtin-class int |
| l_calls.py:59 | ControlFlowNode for int | import | builtin-class int | builtin-class type |
| l_calls.py:62 | ControlFlowNode for Attribute | import | builtin method get | builtin-class method_descriptor |
| l_calls.py:62 | ControlFlowNode for dict | import | builtin-class dict | builtin-class type |
| l_calls.py:63 | ControlFlowNode for Attribute | import | builtin method get | builtin-class method_descriptor |
| l_calls.py:63 | ControlFlowNode for Dict | import | Dict | builtin-class dict |
| l_calls.py:63 | ControlFlowNode for IntegerLiteral | import | int 5 | builtin-class int |
| l_calls.py:63 | ControlFlowNode for Str | import | 'foo' | builtin-class str |
| l_calls.py:63 | ControlFlowNode for dict | import | builtin-class dict | builtin-class type |
| l_calls.py:64 | ControlFlowNode for a | import | builtin method bit_length | builtin-class method_descriptor |
| l_calls.py:64 | ControlFlowNode for b | import | instance of int | builtin-class int |
| l_calls.py:64 | ControlFlowNode for c | import | builtin method get | builtin-class method_descriptor |
| m_attributes.py:3 | ControlFlowNode for ClassExpr | import | class C | builtin-class type |
| m_attributes.py:3 | ControlFlowNode for object | import | builtin-class object | builtin-class type |
| m_attributes.py:5 | ControlFlowNode for FunctionExpr | import | Function C.__init__ | builtin-class function |
Expand Down
10 changes: 10 additions & 0 deletions python/ql/test/library-tests/PointsTo/new/code/l_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,13 @@ def m(self, x, y, z=None):
F().m(*t)
F.m(*t)

# Calls to built-in methods

# Methods with a known return type.
a = int.bit_length
b = int.bit_length(5)

# Methods without a known return type.
c = dict.get
d = dict.get({"foo":5}, 5)
check(a,b,c,d)