diff --git a/lib/galaxy/exceptions/__init__.py b/lib/galaxy/exceptions/__init__.py index ea708600dba1..0c270de3ee02 100644 --- a/lib/galaxy/exceptions/__init__.py +++ b/lib/galaxy/exceptions/__init__.py @@ -70,6 +70,11 @@ class MalformedId( MessageException ): err_code = error_codes.MALFORMED_ID +class MalformedContents( MessageException ): + status_code = 400 + err_code = error_codes.MALFORMED_CONTENTS + + class UnknownContentsType( MessageException ): status_code = 400 err_code = error_codes.UNKNOWN_CONTENTS_TYPE diff --git a/lib/galaxy/exceptions/error_codes.json b/lib/galaxy/exceptions/error_codes.json index c681afdebb02..50faf0fc6444 100644 --- a/lib/galaxy/exceptions/error_codes.json +++ b/lib/galaxy/exceptions/error_codes.json @@ -61,9 +61,14 @@ }, { "name": "USER_TOOL_META_PARAMETER_PROBLEM", - "code": 400011, + "code": 400012, "message": "Supplied incorrect or incompatible tool meta parameters." }, + { + "name": "MALFORMED_CONTENTS", + "code": 400013, + "message": "The contents of the request are malformed." + }, { "name": "USER_AUTHENTICATION_FAILED", "code": 401001, diff --git a/lib/galaxy/tools/imp_exp/__init__.py b/lib/galaxy/tools/imp_exp/__init__.py index fea5325f39ac..55349a4149bb 100644 --- a/lib/galaxy/tools/imp_exp/__init__.py +++ b/lib/galaxy/tools/imp_exp/__init__.py @@ -10,6 +10,7 @@ from sqlalchemy.orm import eagerload, eagerload_all from galaxy import model +from galaxy.exceptions import MalformedContents from galaxy.model.item_attrs import UsesAnnotations from galaxy.tools.parameters.basic import UnvalidatedValue from galaxy.util.json import dumps, loads @@ -170,9 +171,9 @@ def get_tag_str( tag, value ): if dataset_attrs.get('exported', True) is True: # Do security check and move/copy dataset data. temp_dataset_file_name = \ - os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) + os.path.realpath( os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) ) if not file_in_dir( temp_dataset_file_name, os.path.join( archive_dir, "datasets" ) ): - raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name ) + raise MalformedContents( "Invalid dataset path: %s" % temp_dataset_file_name ) if datasets_usage_counts[ temp_dataset_file_name ] == 1: self.app.object_store.update_from_file( hda.dataset, file_name=temp_dataset_file_name, create=True ) @@ -309,6 +310,7 @@ def default( self, obj ): except Exception, e: jiha.job.stderr += "Error cleaning up history import job: %s" % e self.sa_session.flush() + raise class JobExportHistoryArchiveWrapper( object, UsesAnnotations ): diff --git a/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py b/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py index d6890257c8c0..f3dab0c5ae40 100644 --- a/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py +++ b/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py @@ -6,6 +6,7 @@ --[url|file] source type, either a URL or a file. """ +import os import sys import optparse import tarfile @@ -43,6 +44,22 @@ def url_to_file( url, dest_file ): return None +def check_archive( archive_file, dest_dir ): + """ + Ensure that a tar archive has no absolute paths or relative paths outside + the archive. + """ + with tarfile.open( archive_file, mode='r:gz' ) as archive_fp: + for arc_path in archive_fp.getnames(): + assert os.path.normpath( + os.path.join( + dest_dir, + arc_path + ) ).startswith( dest_dir.rstrip(os.sep) + os.sep ), \ + "Archive member would extract outside target directory: %s" % arc_path + return True + + def unpack_archive( archive_file, dest_dir ): """ Unpack a tar and/or gzipped archive into a destination directory. @@ -51,13 +68,8 @@ def unpack_archive( archive_file, dest_dir ): archive_fp.extractall( path=dest_dir ) archive_fp.close() -if __name__ == "__main__": - # Parse command line. - parser = optparse.OptionParser() - parser.add_option( '-U', '--url', dest='is_url', action="store_true", help='Source is a URL.' ) - parser.add_option( '-F', '--file', dest='is_file', action="store_true", help='Source is a URL.' ) - parser.add_option( '-e', '--encoded', dest='is_b64encoded', action="store_true", default=False, help='Source and destination dir values are base64 encoded.' ) - (options, args) = parser.parse_args() + +def main(options, args): is_url = bool( options.is_url ) is_file = bool( options.is_file ) archive_source, dest_dir = args @@ -66,14 +78,25 @@ def unpack_archive( archive_file, dest_dir ): archive_source = b64decode( archive_source ) dest_dir = b64decode( dest_dir ) + # Get archive from URL. + if is_url: + archive_file = url_to_file( archive_source, tempfile.NamedTemporaryFile( dir=dest_dir ).name ) + elif is_file: + archive_file = archive_source + + # Unpack archive. + check_archive( archive_file, dest_dir ) + unpack_archive( archive_file, dest_dir ) + + +if __name__ == "__main__": + # Parse command line. + parser = optparse.OptionParser() + parser.add_option( '-U', '--url', dest='is_url', action="store_true", help='Source is a URL.' ) + parser.add_option( '-F', '--file', dest='is_file', action="store_true", help='Source is a URL.' ) + parser.add_option( '-e', '--encoded', dest='is_b64encoded', action="store_true", default=False, help='Source and destination dir values are base64 encoded.' ) + (options, args) = parser.parse_args() try: - # Get archive from URL. - if is_url: - archive_file = url_to_file( archive_source, tempfile.NamedTemporaryFile( dir=dest_dir ).name ) - elif is_file: - archive_file = archive_source - - # Unpack archive. - unpack_archive( archive_file, dest_dir ) + main(options, args) except Exception, e: print "Error unpacking tar/gz archive: %s" % e, sys.stderr