Security fixes for history imports

galaxyproject · Feb 24, 2016 · 8736c7b · 8736c7b
1 parent b3e0315
commit 8736c7b
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 18 deletions.
diff --git a/lib/galaxy/exceptions/__init__.py b/lib/galaxy/exceptions/__init__.py
@@ -70,6 +70,11 @@ class MalformedId( MessageException ):
     err_code = error_codes.MALFORMED_ID
 
 
+class MalformedContents( MessageException ):
+    status_code = 400
+    err_code = error_codes.MALFORMED_CONTENTS
+
+
 class UnknownContentsType( MessageException ):
     status_code = 400
     err_code = error_codes.UNKNOWN_CONTENTS_TYPE

diff --git a/lib/galaxy/exceptions/error_codes.json b/lib/galaxy/exceptions/error_codes.json
@@ -61,9 +61,14 @@
     },
    {
     "name": "USER_TOOL_META_PARAMETER_PROBLEM",
-    "code": 400011,
+    "code": 400012,
     "message": "Supplied incorrect or incompatible tool meta parameters."
    },
+   {
+    "name": "MALFORMED_CONTENTS",
+    "code": 400013,
+    "message": "The contents of the request are malformed."
+    },
    {
     "name": "USER_AUTHENTICATION_FAILED",
     "code": 401001,

diff --git a/lib/galaxy/tools/imp_exp/__init__.py b/lib/galaxy/tools/imp_exp/__init__.py
@@ -10,6 +10,7 @@
 from sqlalchemy.orm import eagerload, eagerload_all
 
 from galaxy import model
+from galaxy.exceptions import MalformedContents
 from galaxy.model.item_attrs import UsesAnnotations
 from galaxy.tools.parameters.basic import UnvalidatedValue
 from galaxy.util.json import dumps, loads
@@ -170,9 +171,9 @@ def get_tag_str( tag, value ):
                     if dataset_attrs.get('exported', True) is True:
                         # Do security check and move/copy dataset data.
                         temp_dataset_file_name = \
-                            os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) )
+                            os.path.realpath( os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) )
                         if not file_in_dir( temp_dataset_file_name, os.path.join( archive_dir, "datasets" ) ):
-                            raise Exception( "Invalid dataset path: %s" % temp_dataset_file_name )
+                            raise MalformedContents( "Invalid dataset path: %s" % temp_dataset_file_name )
                         if datasets_usage_counts[ temp_dataset_file_name ] == 1:
                             self.app.object_store.update_from_file( hda.dataset, file_name=temp_dataset_file_name, create=True )
 
@@ -309,6 +310,7 @@ def default( self, obj ):
             except Exception, e:
                 jiha.job.stderr += "Error cleaning up history import job: %s" % e
                 self.sa_session.flush()
+                raise
 
 
 class JobExportHistoryArchiveWrapper( object, UsesAnnotations ):

diff --git a/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py b/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py
@@ -6,6 +6,7 @@
     --[url|file] source type, either a URL or a file.
 """
 
+import os
 import sys
 import optparse
 import tarfile
@@ -43,6 +44,22 @@ def url_to_file( url, dest_file ):
         return None
 
 
+def check_archive( archive_file, dest_dir ):
+    """
+    Ensure that a tar archive has no absolute paths or relative paths outside
+    the archive.
+    """
+    with tarfile.open( archive_file, mode='r:gz' ) as archive_fp:
+        for arc_path in archive_fp.getnames():
+            assert os.path.normpath(
+                os.path.join(
+                    dest_dir,
+                    arc_path
+                ) ).startswith( dest_dir.rstrip(os.sep) + os.sep ), \
+                "Archive member would extract outside target directory: %s" % arc_path
+    return True
+
+
 def unpack_archive( archive_file, dest_dir ):
     """
     Unpack a tar and/or gzipped archive into a destination directory.
@@ -51,13 +68,8 @@ def unpack_archive( archive_file, dest_dir ):
     archive_fp.extractall( path=dest_dir )
     archive_fp.close()
 
-if __name__ == "__main__":
-    # Parse command line.
-    parser = optparse.OptionParser()
-    parser.add_option( '-U', '--url', dest='is_url', action="store_true", help='Source is a URL.' )
-    parser.add_option( '-F', '--file', dest='is_file', action="store_true", help='Source is a URL.' )
-    parser.add_option( '-e', '--encoded', dest='is_b64encoded', action="store_true", default=False, help='Source and destination dir values are base64 encoded.' )
-    (options, args) = parser.parse_args()
+
+def main(options, args):
     is_url = bool( options.is_url )
     is_file = bool( options.is_file )
     archive_source, dest_dir = args
@@ -66,14 +78,25 @@ def unpack_archive( archive_file, dest_dir ):
         archive_source = b64decode( archive_source )
         dest_dir = b64decode( dest_dir )
 
+    # Get archive from URL.
+    if is_url:
+        archive_file = url_to_file( archive_source, tempfile.NamedTemporaryFile( dir=dest_dir ).name )
+    elif is_file:
+        archive_file = archive_source
+
+    # Unpack archive.
+    check_archive( archive_file, dest_dir )
+    unpack_archive( archive_file, dest_dir )
+
+
+if __name__ == "__main__":
+    # Parse command line.
+    parser = optparse.OptionParser()
+    parser.add_option( '-U', '--url', dest='is_url', action="store_true", help='Source is a URL.' )
+    parser.add_option( '-F', '--file', dest='is_file', action="store_true", help='Source is a URL.' )
+    parser.add_option( '-e', '--encoded', dest='is_b64encoded', action="store_true", default=False, help='Source and destination dir values are base64 encoded.' )
+    (options, args) = parser.parse_args()
     try:
-        # Get archive from URL.
-        if is_url:
-            archive_file = url_to_file( archive_source, tempfile.NamedTemporaryFile( dir=dest_dir ).name )
-        elif is_file:
-            archive_file = archive_source
-
-        # Unpack archive.
-        unpack_archive( archive_file, dest_dir )
+        main(options, args)
     except Exception, e:
         print "Error unpacking tar/gz archive: %s" % e, sys.stderr