From dae38ce61bd874b41db7fe396bdaf81e5067b405 Mon Sep 17 00:00:00 2001 From: Maxime Arthaud Date: Wed, 5 May 2021 18:08:45 -0700 Subject: [PATCH] Disallow modeling attributes as callables Summary: We currently allow to model attributes as callables, using the `def foo(): ...` syntax in pysa models. This is very confusing and actually leads to non-determinism in the analysis, since attribute models are treated as callable models, but attributes are not part of the call graph and dependency graph. To prevent this, we disallow modeling attributes as callables. We can still use the `foo: TaintSource[Test]` syntax for attributes. Reviewed By: dkgi Differential Revision: D28169809 fbshipit-source-id: f49b82d387e18498f4abf86885c562653fe7361d --- documentation/website/docs/pysa_advanced.md | 42 ------ .../taint/modelParser.ml | 134 +++++++----------- .../taint/modelVerificationError.ml | 11 ++ .../taint/modelVerificationError.mli | 2 + .../integration/dataclass_taint.py.models | 3 +- .../test/integration/dataclass_taint.py.pysa | 2 +- .../taint/test/modelTest.ml | 40 ++++++ .../django_rest_framework.pysa | 8 ++ .../flask_sources_sinks.pysa | 3 + 9 files changed, 119 insertions(+), 126 deletions(-) diff --git a/documentation/website/docs/pysa_advanced.md b/documentation/website/docs/pysa_advanced.md index 2204db92154..c33808788f1 100644 --- a/documentation/website/docs/pysa_advanced.md +++ b/documentation/website/docs/pysa_advanced.md @@ -6,48 +6,6 @@ sidebar_label: Advanced Topics This page documents less straightforward bits of Pysa. -## Annotating `dataclass` Models - -In Pysa, [`dataclasses`](https://docs.python.org/3/library/dataclasses.html?) -are defined via attributes, which are converted to properties under the hood. If -you want to taint the attributes of a `dataclass`, you might try to do the -following: - -```python -# tainted.py -@dataclass(frozen=True) -class MyDataClass: - attribute: str = "" -``` - - -```python -# stubs/taint/tainted.py.pysa -# This won't work -tainted.MyDataClass.attribute: TaintSource[SensitiveData] -``` - -This doesn't work, because during analysis Pysa's understanding of the data -class is of how the class looks after the property is expanded; that is: - -```python -# Pysa's view of tainted.py -class MyDataClass: - @property - def attribute(self) -> str: ... - @attribute.setter - def attribute(self, value) -> None: ... -``` - -Therefore, to annotate a `dataclass` attribute, you can use the `@property` -annotations: - -```python -# stubs/taint/tainted.py.pysa -@property -def tainted.MyDataClass.attribute(self) -> TaintSource[SensitiveData]: ... -``` - ## Tainting Specific `kwargs` Sometimes, a function can have potential sinks mixed together with benign diff --git a/source/interprocedural_analyses/taint/modelParser.ml b/source/interprocedural_analyses/taint/modelParser.ml index 9ed26cf1b86..654729d2029 100644 --- a/source/interprocedural_analyses/taint/modelParser.ml +++ b/source/interprocedural_analyses/taint/modelParser.ml @@ -1590,7 +1590,7 @@ type model_or_query = | Model of (Model.t * Reference.t option) | Query of ModelQuery.rule -let callable_annotation +let resolve_global_callable ~path ~location ~verify_decorators @@ -1598,20 +1598,10 @@ let callable_annotation ({ Define.Signature.name = { Node.value = name; _ }; decorators; _ } as define) = (* Since properties and setters share the same undecorated name, we need to special-case them. *) - let global_resolution = Resolution.global_resolution resolution in - let global_type () = - match GlobalResolution.global global_resolution name with - | Some { AttributeResolution.Global.undecorated_signature; annotation; _ } -> ( - match undecorated_signature with - | Some signature -> Type.Callable signature |> Annotation.create - | None -> annotation ) - | None -> - (* Fallback for fields, which are not globals. *) - from_reference name ~location:Location.any - |> Resolution.resolve_expression_to_annotation resolution - in - let parent = Option.value_exn (Reference.prefix name) in + let open ModelVerifier in let get_matching_method ~predicate = + let parent = Option.value_exn (Reference.prefix name) in + let global_resolution = Resolution.global_resolution resolution in let get_matching_define = function | { Node.value = Statement.Define ({ signature; _ } as define); _ } -> if @@ -1625,6 +1615,7 @@ let callable_annotation ~variables signature |> Type.Callable.create_from_implementation + |> (fun callable -> Global.Attribute callable) |> Option.some else None @@ -1635,21 +1626,13 @@ let callable_annotation >>= List.hd >>| (fun definition -> definition.Node.value.Class.body) >>= List.find_map ~f:get_matching_define - >>| Annotation.create - |> function - | Some annotation -> Ok annotation - | None -> - Error - (model_verification_error - ~path - ~location - (ModelVerificationError.T.NotInEnvironment (Reference.show name))) + |> Core.Result.return in if signature_is_property define then get_matching_method ~predicate:is_property else if Define.Signature.is_property_setter define then get_matching_method ~predicate:Define.is_property_setter - else if (not (List.is_empty decorators)) && verify_decorators then + else if verify_decorators && not (List.is_empty decorators) then (* Ensure that models don't declare decorators that our taint analyses doesn't understand. We check for the verify_decorators flag, as defines originating from `create_model_from_annotation` are not user-specified models that we're parsing. *) @@ -1659,7 +1642,7 @@ let callable_annotation ~location (ModelVerificationError.T.UnexpectedDecorators { name; unexpected_decorators = decorators })) else - Ok (global_type ()) + Ok (resolve_global ~resolution name) let adjust_mode_and_skipped_overrides @@ -2194,6 +2177,8 @@ let create_model_from_signature } = let open Core.Result in + let open ModelVerifier in + let call_target = (call_target :> Callable.t) in (* Strip off the decorators only used for taint annotations. *) let top_level_decorators, define = let is_taint_decorator decorator = @@ -2218,49 +2203,32 @@ let create_model_from_signature { location with start } in (* Make sure we know about what we model. *) + let model_verification_error kind = Error { ModelVerificationError.T.kind; path; location } in let callable_annotation = - callable_annotation ~path ~location ~verify_decorators:true ~resolution define - in - let call_target = (call_target :> Callable.t) in - let callable_annotation = - callable_annotation - >>= fun callable_annotation -> - if - Type.is_top (Annotation.annotation callable_annotation) - (* FIXME: We are relying on the fact that nonexistent functions&attributes resolve to mutable - callable annotation, while existing ones resolve to immutable callable annotation. This is - fragile! *) - && not (Annotation.is_immutable callable_annotation) - then - Error - { - ModelVerificationError.T.kind = - ModelVerificationError.T.NotInEnvironment (Reference.show callable_name); - path; - location; - } - else if Type.is_meta (Annotation.annotation callable_annotation) then - Error - { - ModelVerificationError.T.kind = - ModelVerificationError.T.ModelingClassAsDefine (Reference.show callable_name); - path; - location; - } - else - Ok callable_annotation + resolve_global_callable ~path ~location ~verify_decorators:true ~resolution define + >>= function + | None -> + model_verification_error + (ModelVerificationError.T.NotInEnvironment (Reference.show callable_name)) + | Some Global.Class -> + model_verification_error + (ModelVerificationError.T.ModelingClassAsDefine (Reference.show callable_name)) + | Some Global.Module -> + model_verification_error + (ModelVerificationError.T.ModelingModuleAsDefine (Reference.show callable_name)) + | Some (Global.Attribute (Type.Callable t)) + | Some + (Global.Attribute + (Type.Parametric + { name = "BoundMethod"; parameters = [Type.Parameter.Single (Type.Callable t); _] })) -> + Ok (Some t) + | Some (Global.Attribute Type.Any) -> Ok None + | Some (Global.Attribute Type.Top) -> Ok None + | Some (Global.Attribute _) -> + model_verification_error + (ModelVerificationError.T.ModelingAttributeAsDefine (Reference.show callable_name)) in (* Check model matches callables primary signature. *) - let callable_annotation = - callable_annotation - >>| Annotation.annotation - >>| function - | Type.Callable t -> Some t - | Type.Parametric - { name = "BoundMethod"; parameters = [Type.Parameter.Single (Type.Callable t); _] } -> - Some t - | _ -> None - in let callable_parameter_names_to_positions = match callable_annotation with | Ok @@ -2306,7 +2274,7 @@ let create_model_from_signature { name = Reference.show callable_name; callable_type = - Option.value_exn (Caml.Result.value callable_annotation ~default:None); + Option.value_exn (Caml.Result.get_ok callable_annotation); reasons = [UnexpectedPositionalParameter name]; }; path; @@ -2548,6 +2516,7 @@ let create_callable_model_from_annotations annotations = let open Core.Result in + let open ModelVerifier in let global_resolution = Resolution.global_resolution resolution in match Interprocedural.Callable.get_module_and_definition ~resolution:global_resolution callable @@ -2557,22 +2526,23 @@ let create_callable_model_from_annotations (invalid_model_query_error (Format.sprintf "No callable corresponding to `%s` found." (Callable.show callable))) | Some (_, { Node.value = { Define.signature = define; _ }; _ }) -> - let callable_annotation = - callable_annotation - ~path:None - ~location:Location.any - ~resolution - ~verify_decorators:false - define - >>| Annotation.annotation - >>| function - | Type.Callable t -> Some t - | Type.Parametric - { name = "BoundMethod"; parameters = [Type.Parameter.Single (Type.Callable t); _] } -> - Some t - | _ -> None - in - callable_annotation + resolve_global_callable + ~path:None + ~location:Location.any + ~resolution + ~verify_decorators:false + define + >>| (function + | Some (Global.Attribute (Type.Callable t)) + | Some + (Global.Attribute + (Type.Parametric + { + name = "BoundMethod"; + parameters = [Type.Parameter.Single (Type.Callable t); _]; + })) -> + Some t + | _ -> None) >>= fun callable_annotation -> List.fold annotations diff --git a/source/interprocedural_analyses/taint/modelVerificationError.ml b/source/interprocedural_analyses/taint/modelVerificationError.ml index 95a749e35ba..eec09dbb47f 100644 --- a/source/interprocedural_analyses/taint/modelVerificationError.ml +++ b/source/interprocedural_analyses/taint/modelVerificationError.ml @@ -57,6 +57,8 @@ module T = struct attribute_name: string; } | ModelingClassAsDefine of string + | ModelingModuleAsDefine of string + | ModelingAttributeAsDefine of string | NotInEnvironment of string | UnexpectedDecorators of { name: Reference.t; @@ -177,6 +179,13 @@ let description error = "The class `%s` is not a valid define - did you mean to model `%s.__init__()`?" class_name class_name + | ModelingModuleAsDefine module_name -> + Format.sprintf "The module `%s` is not a valid define." module_name + | ModelingAttributeAsDefine attribute_name -> + Format.sprintf + "The attribute `%s` is not a valid define - did you mean to use `%s: ...`?" + attribute_name + attribute_name | NotInEnvironment name -> Format.sprintf "`%s` is not part of the environment!" name @@ -199,6 +208,8 @@ let code { kind; _ } = | InvalidModelQueryClauseArguments _ -> 14 | InvalidIdentifier _ -> 15 | UnexpectedStatement _ -> 16 + | ModelingModuleAsDefine _ -> 17 + | ModelingAttributeAsDefine _ -> 18 let display { kind = error; path; location } = diff --git a/source/interprocedural_analyses/taint/modelVerificationError.mli b/source/interprocedural_analyses/taint/modelVerificationError.mli index 71d9c656a09..2f55dad3ee0 100644 --- a/source/interprocedural_analyses/taint/modelVerificationError.mli +++ b/source/interprocedural_analyses/taint/modelVerificationError.mli @@ -55,6 +55,8 @@ module T : sig attribute_name: string; } | ModelingClassAsDefine of string + | ModelingModuleAsDefine of string + | ModelingAttributeAsDefine of string | NotInEnvironment of string | UnexpectedDecorators of { name: Reference.t; diff --git a/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.models b/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.models index 1ec1281ca0d..45f1239866a 100644 --- a/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.models +++ b/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.models @@ -697,7 +697,8 @@ "leaves": [ { "kind": "Test", - "name": "dataclass_taint.DataClassWithOtherSource.tainted" + "name": + "Obj{dataclass_taint.DataClassWithOtherSource.tainted}" } ] } diff --git a/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.pysa b/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.pysa index e5a78fdd8e7..20f2010f291 100644 --- a/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.pysa +++ b/source/interprocedural_analyses/taint/test/integration/dataclass_taint.py.pysa @@ -1,4 +1,4 @@ def dataclass_taint.WeirdDataClass.__init__(self, bad: TaintSource[UserControlled]): ... dataclass_taint.WeirdDataClass.bad_sink: TaintSink[Test] = ... dataclass_taint.DataClassWithSource.tainted: TaintSource[Test] = ... -def dataclass_taint.DataClassWithOtherSource.tainted() -> TaintSource[Test]: ... +dataclass_taint.DataClassWithOtherSource.tainted: TaintSource[Test] diff --git a/source/interprocedural_analyses/taint/test/modelTest.ml b/source/interprocedural_analyses/taint/test/modelTest.ml index a4b6d2a8de9..72b2adb9bb7 100644 --- a/source/interprocedural_analyses/taint/test/modelTest.ml +++ b/source/interprocedural_analyses/taint/test/modelTest.ml @@ -1957,6 +1957,46 @@ let test_invalid_models context = ~model_source:"def not_in_the_environment.derp(parameter: InvalidTaintDirection[Test]): ..." ~expect:"`not_in_the_environment.derp` is not part of the environment!" (); + assert_invalid_model + ~model_source:"def test(parameter: InvalidTaintDirection[Test]): ..." + ~expect:"The module `test` is not a valid define." + (); + assert_invalid_model + ~source:{| + class Foo: + x: int = 1 + |} + ~model_source:{| + def test.Foo.x(self) -> TaintSource[Test]: ... + |} + ~expect: + "The attribute `test.Foo.x` is not a valid define - did you mean to use `test.Foo.x: ...`?" + (); + (* Accept callable models for Any or Top because of type error. *) + assert_valid_model + ~source: + {| + class Foo: + @unknown_decorator + def bar(self): + pass + |} + ~model_source:{| + def test.Foo.bar() -> TaintSource[A]: ... + |} + (); + assert_valid_model + ~source: + {| + class Foo: + def bar(self): + pass + baz = bar + |} + ~model_source:{| + def test.Foo.baz() -> TaintSource[A]: ... + |} + (); assert_invalid_model ~source: {| diff --git a/stubs/taint/core_privacy_security/django_rest_framework.pysa b/stubs/taint/core_privacy_security/django_rest_framework.pysa index 480650af19f..ec43861c86b 100644 --- a/stubs/taint/core_privacy_security/django_rest_framework.pysa +++ b/stubs/taint/core_privacy_security/django_rest_framework.pysa @@ -1,8 +1,16 @@ +@property def rest_framework.request.Request.POST(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.FILES(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.DATA(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.QUERY_PARAMS(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.data(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.query_params(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.content_type(self) -> TaintSource[UserControlled]: ... +@property def rest_framework.request.Request.stream(self) -> TaintSource[UserControlled]: ... diff --git a/stubs/taint/core_privacy_security/flask_sources_sinks.pysa b/stubs/taint/core_privacy_security/flask_sources_sinks.pysa index 7451cba66bd..81976bd3c94 100644 --- a/stubs/taint/core_privacy_security/flask_sources_sinks.pysa +++ b/stubs/taint/core_privacy_security/flask_sources_sinks.pysa @@ -9,8 +9,11 @@ werkzeug.wrappers.BaseRequest.base_url: TaintSource[UserControlled, UserControll def werkzeug.wrappers.BaseRequest.cookies(self) -> TaintSource[UserControlled, UserControlled_Meta, Cookies]: ... werkzeug.wrappers.BaseRequest.method: TaintSource[UserControlled, UserControlled_Meta] = ... werkzeug.wrappers.BaseRequest.headers: TaintSource[UserControlled, UserControlled_Meta, HeaderData] = ... +@property def werkzeug.wrappers.CommonRequestDescriptorsMixin.content_type(self) -> TaintSource[UserControlled, UserControlled_Meta]: ... +@property def werkzeug.wrappers.CommonRequestDescriptorsMixin.referrer(self) -> TaintSource[UserControlled, UserControlled_Meta, HeaderData]: ... +@property def werkzeug.wrappers.UserAgentMixin.user_agent(self) -> TaintSource[UserControlled, UserControlled_Meta]: ... def werkzeug.datastructures.Headers.get(self, key, type) -> TaintSource[UserControlled, UserControlled_Meta, HeaderData]: ...