diff --git a/python/CWE-611/XXE-insecure-parser.ql b/python/CWE-611/XXE-insecure-parser.ql new file mode 100644 index 0000000000..d6658243d5 --- /dev/null +++ b/python/CWE-611/XXE-insecure-parser.ql @@ -0,0 +1,21 @@ +/** + * @name XML External Entity (XXE) using Insecure XML Parser + * @description XML Parser using insecure feature + * @kind problem + * @problem.severity error + * @security-severity 8.0 + * @sub-severity high + * @precision medium + * @id py/xxe + * @tags security + * external/cwe/cwe-611 + */ + +import python +import semmle.python.Concepts +import semmle.python.ApiGraphs +import github.XXE + +from API::Node parsers +where parsers = XXE::getPyXMLParser() +select parsers, "XML Parser using insecure feature" diff --git a/python/CWE-611/XXE.ql b/python/CWE-611/XXE.ql new file mode 100644 index 0000000000..dfe4a13d83 --- /dev/null +++ b/python/CWE-611/XXE.ql @@ -0,0 +1,35 @@ +/** + * @name XML External Entity (XXE) + * @description XXE using file / string from remote sources + * @kind path-problem + * @problem.severity error + * @security-severity 8.0 + * @sub-severity high + * @precision high + * @id py/xxe + * @tags security + * external/cwe/cwe-611 + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.TaintTracking +import semmle.python.Concepts +import semmle.python.ApiGraphs +import DataFlow::PathGraph +import github.XXE + +class XXEConfiguration extends TaintTracking::Configuration { + XXEConfiguration() { this = "XXE" } + + override predicate isSource(DataFlow::Node source) { source instanceof XXE::Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof XXE::Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof XXE::Sanitizer } +} + +from XXEConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "XXE depends on $@.", source.getNode(), "a user-provided value" diff --git a/python/github/XXE.qll b/python/github/XXE.qll new file mode 100644 index 0000000000..7de4bc78e4 --- /dev/null +++ b/python/github/XXE.qll @@ -0,0 +1,59 @@ +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.TaintTracking +import semmle.python.Concepts +import semmle.python.ApiGraphs + +module XXE { + abstract class Source extends DataFlow::Node { } + + abstract class Sink extends DataFlow::Node { } + + abstract class Sanitizer extends DataFlow::Node { } + + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + API::Node getPyXMLParser() { + // Find Insecure parsers + exists(API::Node nodes, DataFlow::CallCfgNode feature | + // > from xml.sax import make_parser + // > parser = make_parser() + nodes = API::moduleImport("xml.sax").getMember("make_parser") and + // Make sure that the feature is enabled + // > from xml.sax.handler import feature_external_ges + // > parser.setFeature(feature_external_ges, True) + feature = nodes.getReturn().getMember("setFeature").getACall() and + feature.getArg(0) = + API::moduleImport("xml.sax.handler").getMember("feature_external_ges").getAUse() and + feature.getArg(1).asExpr().(BooleanLiteral).booleanValue() = true and + result = nodes.getReturn() + ) + } + + class PyXMLSax extends Sink { + PyXMLSax() { + exists(DataFlow::CallCfgNode call | + // https://github.com/python/cpython/blob/main/Lib/xml/sax/__init__.py#L70 + // > from xml.sax import make_parser + // > parser = make_parser() + // > parser.parse("xxe.xml") + call = getPyXMLParser().getMember("parse").getACall() and + this = call.getArg(0) + ) + } + } + + class PyXMLDom extends Sink { + PyXMLDom() { + exists(DataFlow::CallCfgNode call | + // https://github.com/python/cpython/blob/3.10/Lib/xml/dom/pulldom.py#L331-L349 + // > from xml.dom.pulldom import parseString + // > parseString(request.body.decode('utf-8'), parser=parser) + call = API::moduleImport("xml.dom.pulldom").getMember(["parseString", "parse"]).getACall() and + call.getArgByName("parser") = getPyXMLParser().getAUse() and + this = call.getArg(0) + ) + } + } +}