Skip to content

Commit

Permalink
[spark] Fixes python tarslip security concern
Browse files Browse the repository at this point in the history
  • Loading branch information
frankfliu committed Feb 15, 2024
1 parent 64c1b96 commit 5235be5
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions extensions/spark/setup/djl_spark/util/files_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ def download_and_extract(url, path):
:param url: The url of the tar file.
:param path: The path to the file to download to.
"""
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)

if not os.path.exists(path):
os.makedirs(path)
if not os.listdir(path):
Expand All @@ -78,9 +92,9 @@ def download_and_extract(url, path):
if url.startswith("s3://"):
s3_download(url, tmp_file)
with tarfile.open(name=tmp_file, mode="r:gz") as t:
t.extractall(path=path)
safe_extract(t, path=path)
elif url.startswith("http://") or url.startswith("https://"):
with urlopen(url) as response, open(tmp_file, 'wb') as f:
shutil.copyfileobj(response, f)
with tarfile.open(name=tmp_file, mode="r:gz") as t:
t.extractall(path=path)
safe_extract(t, path=path)

0 comments on commit 5235be5

Please sign in to comment.