Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
category: majorAnalysis
---
* Added a couple of sources, as `tarfile.TarFile(), MKtarfile.Tarfile.open(), contextlib.closing(XXXX)`.
* Added a couple of sinks, as `_extract_member()`, and more from `extractall()`.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.Attributes

/**
* Provides default sources, sinks and sanitizers for detecting
Expand All @@ -31,12 +32,41 @@ module TarSlip {
*/
abstract class Sanitizer extends DataFlow::Node { }

/**
* Handle those three cases of Tarfile opens:
* - `tarfile.open()`
* - `tarfile.TarFile()`
* - `MKtarfile.Tarfile.open()`
*/
API::Node tarfileOpen() {
result in [
API::moduleImport("tarfile").getMember(["open", "TarFile"]),
API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
]
}

/**
* Handle the previous three cases, plus the use of `closing` in the previous cases
*/
class AllTarfileOpens extends API::CallNode {
AllTarfileOpens() {
exists(TarfileOpens tfo | this = tfo.getACall())
or
exists(API::Node closing, Node arg, TarfileOpens tfo |
closing = API::moduleImport("contextlib").getMember("closing") and
this = closing.getACall() and
arg = this.getArg(0) and
arg = tfo.getACall()
)
}
}
Comment on lines +51 to +62
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The extra source added here is better handled by an additional taint step. Then paths will start at the open call rather than at the closing call, and, if the two a separated in the code, we would find the data flow to connect them.


/**
* A call to `tarfile.open`, considered as a flow source.
*/
class TarfileOpen extends Source {
TarfileOpen() {
this = API::moduleImport("tarfile").getMember("open").getACall() and
this = tarfileOpen().getACall()
// If argument refers to a string object, then it's a hardcoded path and
// this tarfile is safe.
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StrConst and
Expand All @@ -58,20 +88,38 @@ module TarSlip {
/**
* A sink capturing method calls to `extractall`.
*
* For a call to `file.extractall` without arguments, `file` is considered a sink.
* For a call to `file.extractall` without `members` argument, `file` is considered a sink.
*/
class ExtractAllSink extends Sink {
ExtractAllSink() {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember("extractall")
.getACall() and
not exists(call.getArg(_)) and
not exists(call.getArgByName(_)) and
this = call.(DataFlow::MethodCallNode).getObject()
class ExtractAllWithoutMembersSink extends Sink {
ExtractAllWithoutMembersSink() {
exists(AllTarfileOpens atfo, MethodCallNode call |
call = atfo.getReturn().getMember("extractall").getACall() and
not exists(Node arg | arg = call.getArgByName("members")) and
this = call.getObject()
)
}
}

/**
* A sink capturing method calls to `extractall` with `members` arguments.
*
* For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
* a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
* Otherwise, the argument of `members` is considered a sink.
*/
class ExtractAllwithMembersSink extends Sink {
ExtractAllwithMembersSink() {
exists(AllTarfileOpens atfo, MethodCallNode call, Node arg |
call = atfo.getReturn().getMember("extractall").getACall() and
arg = call.getArgByName("members") and
if
arg.asCfgNode() instanceof NameConstantNode or
arg.asCfgNode() instanceof ListNode
then this = call.getObject()
else
if arg.(MethodCallNode).getMethodName() = "getmembers"
then this = arg.(MethodCallNode).getObject()
else this = call.getArgByName("members")
)
}
}
Expand All @@ -81,10 +129,18 @@ module TarSlip {
*/
class ExtractSink extends Sink {
ExtractSink() {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
this = call.getArg(0)
this = tarfileOpen().getReturn().getMember("extract").getACall().getArg(0)
}
}

/**
* An argument to `_extract_member` is considered a sink.
*/
class ExtractMemberSink extends Sink {
ExtractMemberSink() {
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("_extract_member").getACall() and
call.getArg(1).(AttrRead).accesses(this, "name")
)
}
}
Expand Down
60 changes: 60 additions & 0 deletions python/ql/src/experimental/Security/CWE-022bis/TarSlip.qhelp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>

<overview>
<p>Extracting files from a malicious tarball without validating that the destination file path
is within the destination directory can cause files outside the destination directory to be
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
archive path names.</p>

<p>Tarball contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>

<p>For example, if a tarball contains a file entry <code>../sneaky-file</code>, and the tarball
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
in an output file path of <code>/tmp/tmp123/../sneaky-file</code>, which would cause the file to be
written to <code>/tmp/</code>.</p>

</overview>
<recommendation>

<p>Ensure that output paths constructed from tarball entries are validated
to prevent writing files to unexpected locations.</p>

<p>The recommended way of writing an output file from a tarball entry is to call <code>extract()</code> or <code>extractall()</code>.
</p>

</recommendation>

<example>
<p>
In this example an archive is extracted without validating file paths.
</p>

<sample src="examples/TarSlip_1.py" />

<p>To fix this vulnerability, we need to call the function <code>extractall()</code>.
</p>

<sample src="examples/NoHIT_TarSlip_1.py" />

</example>
<references>
<li>
Snyk:
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
</li>

<li>
Tarfile documentation
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">extractall() warning</a>
</li>
</references>
</qhelp>
24 changes: 24 additions & 0 deletions python/ql/src/experimental/Security/CWE-022bis/TarSlip.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* @name Arbitrary file write during archive extraction ("Tar Slip")
* @description Extracting files from a malicious archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten.
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-022
*/

import python
import semmle.python.security.dataflow.TarSlipCustomizations
import semmle.python.security.dataflow.TarSlipQuery
import DataFlow::PathGraph

from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select source.getNode(), source, sink,
"This unsanitized archive entry, which may contain '..', is used in a $@.", sink.getNode(),
"file system operation"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import sys
import tarfile

def managed_members_archive_handler(filename):
tar = tarfile.open(filename)
result = []
for member in tar:
if ".." in member.name:
raise ValueError("Path in member name !!!")
result.append(member)
path = sys.argv[2]
#print("files are extracted to: ", path)
tar.extractall(path=path, members=result)
tar.close()

if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
managed_members_archive_handler(filename)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import sys
import tarfile
import tempfile

def managed_members_archive_handler(filename):
tar = tarfile.open(filename)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar))
tar.close()


def members_filter(tarfile):
result = []
for member in tarfile:
if '../' in member.name:
print('Member name container directory traversal sequence')
continue
elif member.issym() or member.islnk():
print('Symlink to external resource')
continue
result.append(member)
return result


if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
managed_members_archive_handler(filename)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import tarfile
import sys

with tarfile.open(sys.argv[1]) as tar:
for entry in tar:
if ".." in entry.name:
raise ValueError("Illegal tar archive entry")
tar.extract(entry, "/tmp/unpack/")
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import tarfile
import sys
import os

def _validate_archive_name(name, target):
if not os.path.abspath(os.path.join(target, name)).startswith(target + os.path.sep):
raise ValueError(f"Provided language pack contains invalid name {name}")

with tarfile.open(sys.argv[1]) as tar:
target = "/tmp/unpack"
for entry in tar:
_validate_archive_name(entry.name, target)
tar.extract(entry, target)
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# https://github.com/PyCQA/bandit

import sys
import tarfile
import tempfile

def managed_members_archive_handler(filename):
tar = tarfile.open(filename)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar))
tar.close()


def members_filter(tarfile):
result = []
for member in tarfile.getmembers():
if '../' in member.name:
print('Member name container directory traversal sequence')
continue
elif (member.issym() or member.islnk()) and ('../' in member.linkname):
print('Symlink to external resource')
continue
result.append(member)
return result


if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
managed_members_archive_handler(filename)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# https://github.com/OctoPrint/OctoPrint/

import sys
import tarfile
import os

def _validate_tar_info(info, target):
_validate_archive_name(info.name, target)
if not (info.isfile() or info.isdir()):
raise ValueError("Provided language pack contains invalid file type")

def _validate_archive_name(name, target):
if not os.path.abspath(os.path.join(target, name)).startswith(target + os.path.sep):
raise ValueError(f"Provided language pack contains invalid name {name}")

target = "/tmp/unpack"

with tarfile.open(sys.argv[1], "r") as tar:

# sanity check
for info in tar.getmembers():
_validate_tar_info(info, target)

# unpack everything
tar.extractall(target)

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# https://github.com/PyCQA/bandit

import sys
import tarfile
import tempfile

def managed_members_archive_handler(filename):
tar = tarfile.open(filename)
tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar))
tar.close()


def members_filter(tarfile):
result = []
for member in tarfile.getmembers():
if '../' in member.name:
print('Member name container directory traversal sequence')
continue
elif member.issym() or member.islnk():
print('Symlink to external resource')
continue
result.append(member)
return result


if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
managed_members_archive_handler(filename)
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# https://github.com/PyCQA/bandit

import sys
import tarfile
import tempfile

def managed_members_archive_handler(filename):
tar = tarfile.open(filename)
tarf = tar.getmembers()
for f in tarf:
if not f.issym():
tar.extractall(path=tempfile.mkdtemp(), members=[f])
tar.close()

if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
managed_members_archive_handler(filename)
Loading