From 964055c107650b36d7cb829ac0a3237320ccc0f5 Mon Sep 17 00:00:00 2001 From: Mathew Payne Date: Mon, 14 Mar 2022 19:21:05 +0000 Subject: [PATCH 1/5] Initial XXE query --- python/CWE-611/XXE-insecure-parser.ql | 24 ++++++++++++++ python/CWE-611/XXE.ql | 34 ++++++++++++++++++++ python/github/XXE.qll | 45 +++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 python/CWE-611/XXE-insecure-parser.ql create mode 100644 python/CWE-611/XXE.ql create mode 100644 python/github/XXE.qll diff --git a/python/CWE-611/XXE-insecure-parser.ql b/python/CWE-611/XXE-insecure-parser.ql new file mode 100644 index 0000000000..feaf69f1ae --- /dev/null +++ b/python/CWE-611/XXE-insecure-parser.ql @@ -0,0 +1,24 @@ +/** + * @name XXE using Insecure XML Parser + * @description XML Parser using insecure feature + * @kind problem + * @problem.severity error + * @security-severity 8.0 + * @sub-severity high + * @precision medium + * @id py/xxe + * @tags security + * external/cwe/cwe-611 + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.TaintTracking +import semmle.python.Concepts +import semmle.python.ApiGraphs +import github.XXE + +from API::Node parsers +where parsers = XXE::getPyXMLParser() +select parsers, "XML Parser using insecure feature" diff --git a/python/CWE-611/XXE.ql b/python/CWE-611/XXE.ql new file mode 100644 index 0000000000..b382cc42a5 --- /dev/null +++ b/python/CWE-611/XXE.ql @@ -0,0 +1,34 @@ +/** + * @name XXE + * @description XXE + * @kind problem + * @problem.severity error + * @security-severity 8.0 + * @sub-severity high + * @precision high + * @id py/xxe + * @tags security + * external/cwe/cwe-611 + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.TaintTracking +import semmle.python.Concepts +import semmle.python.ApiGraphs +import github.XXE + +class XXEConfiguration extends TaintTracking::Configuration { + XXEConfiguration() { this = "XXE" } + + override predicate isSource(DataFlow::Node source) { source instanceof XXE::Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof XXE::Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof XXE::Sanitizer } +} + +from XXEConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "XXE depends on $@.", source.getNode(), "a user-provided value" diff --git a/python/github/XXE.qll b/python/github/XXE.qll new file mode 100644 index 0000000000..50d3e349e1 --- /dev/null +++ b/python/github/XXE.qll @@ -0,0 +1,45 @@ +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.TaintTracking +import semmle.python.Concepts +import semmle.python.ApiGraphs + +module XXE { + abstract class Source extends DataFlow::Node { } + + abstract class Sink extends DataFlow::Node { } + + abstract class Sanitizer extends DataFlow::Node { } + + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + API::Node getPyXMLParser() { + // Find Insecure parsers + exists(API::Node nodes, DataFlow::CallCfgNode feature | + // > from xml.sax import make_parser + // > parser = make_parser() + nodes = API::moduleImport("xml.sax").getMember("make_parser") and + // Make sure that the feature is enabled + // > from xml.sax.handler import feature_external_ges + // > parser.setFeature(feature_external_ges, True) + feature = nodes.getReturn().getMember("setFeature").getACall() and + feature.getArg(0) = + API::moduleImport("xml.sax.handler").getMember("feature_external_ges").getAUse() and + feature.getArg(1).asExpr().(BooleanLiteral).booleanValue() = true and + result = nodes.getReturn() + ) + } + + class PyXML extends Sink { + PyXML() { + exists(DataFlow::CallCfgNode call | + // > from xml.dom.pulldom import parseString + // > parseString(request.body.decode('utf-8'), parser=parser) + call = API::moduleImport("xml.dom.pulldom").getMember("parseString").getACall() and + call.getArgByName("parser") = getPyXMLParser().getAUse() and + this = call.getArg(0) + ) + } + } +} From e497f5c8ca38508702ea106ad24ed44f315ed7dd Mon Sep 17 00:00:00 2001 From: Mathew Payne Date: Mon, 14 Mar 2022 19:34:25 +0000 Subject: [PATCH 2/5] Update to path-problem --- python/CWE-611/XXE.ql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/CWE-611/XXE.ql b/python/CWE-611/XXE.ql index b382cc42a5..9dab42c9a9 100644 --- a/python/CWE-611/XXE.ql +++ b/python/CWE-611/XXE.ql @@ -1,7 +1,7 @@ /** * @name XXE * @description XXE - * @kind problem + * @kind path-problem * @problem.severity error * @security-severity 8.0 * @sub-severity high @@ -17,6 +17,7 @@ import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.dataflow.new.TaintTracking import semmle.python.Concepts import semmle.python.ApiGraphs +import DataFlow::PathGraph import github.XXE class XXEConfiguration extends TaintTracking::Configuration { From f8c7beca212049ab5303f506fb68ae4857cb8954 Mon Sep 17 00:00:00 2001 From: Mathew Payne Date: Mon, 14 Mar 2022 19:36:10 +0000 Subject: [PATCH 3/5] Update metadata --- python/CWE-611/XXE-insecure-parser.ql | 2 +- python/CWE-611/XXE.ql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/CWE-611/XXE-insecure-parser.ql b/python/CWE-611/XXE-insecure-parser.ql index feaf69f1ae..3d2fbbd81c 100644 --- a/python/CWE-611/XXE-insecure-parser.ql +++ b/python/CWE-611/XXE-insecure-parser.ql @@ -1,5 +1,5 @@ /** - * @name XXE using Insecure XML Parser + * @name XML External Entity (XXE) using Insecure XML Parser * @description XML Parser using insecure feature * @kind problem * @problem.severity error diff --git a/python/CWE-611/XXE.ql b/python/CWE-611/XXE.ql index 9dab42c9a9..dfe4a13d83 100644 --- a/python/CWE-611/XXE.ql +++ b/python/CWE-611/XXE.ql @@ -1,6 +1,6 @@ /** - * @name XXE - * @description XXE + * @name XML External Entity (XXE) + * @description XXE using file / string from remote sources * @kind path-problem * @problem.severity error * @security-severity 8.0 From caa226911e30cfffc16141a5b4d0b7a82d4b7b2f Mon Sep 17 00:00:00 2001 From: Mathew Payne Date: Mon, 14 Mar 2022 19:54:14 +0000 Subject: [PATCH 4/5] Add better parser support --- python/github/XXE.qll | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/python/github/XXE.qll b/python/github/XXE.qll index 50d3e349e1..7de4bc78e4 100644 --- a/python/github/XXE.qll +++ b/python/github/XXE.qll @@ -31,12 +31,26 @@ module XXE { ) } - class PyXML extends Sink { - PyXML() { + class PyXMLSax extends Sink { + PyXMLSax() { exists(DataFlow::CallCfgNode call | + // https://github.com/python/cpython/blob/main/Lib/xml/sax/__init__.py#L70 + // > from xml.sax import make_parser + // > parser = make_parser() + // > parser.parse("xxe.xml") + call = getPyXMLParser().getMember("parse").getACall() and + this = call.getArg(0) + ) + } + } + + class PyXMLDom extends Sink { + PyXMLDom() { + exists(DataFlow::CallCfgNode call | + // https://github.com/python/cpython/blob/3.10/Lib/xml/dom/pulldom.py#L331-L349 // > from xml.dom.pulldom import parseString // > parseString(request.body.decode('utf-8'), parser=parser) - call = API::moduleImport("xml.dom.pulldom").getMember("parseString").getACall() and + call = API::moduleImport("xml.dom.pulldom").getMember(["parseString", "parse"]).getACall() and call.getArgByName("parser") = getPyXMLParser().getAUse() and this = call.getArg(0) ) From b11bf5adac619ebe4f9ec181e4062a65e12be65b Mon Sep 17 00:00:00 2001 From: Mathew Payne Date: Wed, 16 Mar 2022 10:59:36 +0000 Subject: [PATCH 5/5] Remove imports --- python/CWE-611/XXE-insecure-parser.ql | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/CWE-611/XXE-insecure-parser.ql b/python/CWE-611/XXE-insecure-parser.ql index 3d2fbbd81c..d6658243d5 100644 --- a/python/CWE-611/XXE-insecure-parser.ql +++ b/python/CWE-611/XXE-insecure-parser.ql @@ -12,9 +12,6 @@ */ import python -import semmle.python.dataflow.new.DataFlow -import semmle.python.dataflow.new.RemoteFlowSources -import semmle.python.dataflow.new.TaintTracking import semmle.python.Concepts import semmle.python.ApiGraphs import github.XXE