From 4845a39071ab4e2652e564efdf7647a942563603 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Wed, 24 Feb 2016 11:18:19 -0500 Subject: [PATCH] Security fixes for tool shed hg push and capsule/tarball uploads --- .../tool_shed/framework/middleware/hg.py | 21 ++++++- lib/tool_shed/capsule/capsule_manager.py | 57 ++++++++++++------- lib/tool_shed/util/commit_util.py | 54 +++++++++++------- lib/tool_shed/util/repository_content_util.py | 26 +++------ 4 files changed, 99 insertions(+), 59 deletions(-) diff --git a/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py b/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py index 14745cb6446d..f07f017837cd 100644 --- a/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py +++ b/lib/galaxy/webapps/tool_shed/framework/middleware/hg.py @@ -9,7 +9,7 @@ from paste.httpheaders import AUTH_TYPE from paste.httpheaders import REMOTE_USER -from galaxy.util import asbool +from galaxy.util import asbool, safe_relpath from galaxy.util.hash_util import new_secure_hash from tool_shed.util import hg_util import tool_shed.repository_types.util as rt_util @@ -113,7 +113,11 @@ def __call__( self, environ, start_response ): fh.write( chunk ) fh.close() fh = open( tmp_filename, 'rb' ) - changeset_groups = json.loads( hg_util.bundle_to_json( fh ) ) + try: + changeset_groups = json.loads( hg_util.bundle_to_json( fh ) ) + except AttributeError: + msg = 'Your version of Mercurial is not supported. Please use a version < 3.5' + return self.__display_exception_remotely( start_response, msg ) fh.close() try: os.unlink( tmp_filename ) @@ -122,6 +126,19 @@ def __call__( self, environ, start_response ): if changeset_groups: # Check the repository type to make sure inappropriate files are not being pushed. if 'PATH_INFO' in environ: + # Ensure there are no symlinks with targets outside the repo + for entry in changeset_groups: + if len( entry ) == 2: + filename, change_list = entry + if not isinstance(change_list, list): + change_list = [change_list] + for change in change_list: + for patch in change['data']: + target = patch['block'].strip() + if ( ( patch['end'] - patch['start'] == 0 ) and not safe_relpath( target ) ): + msg = "Changes include a symlink outside of the repository: %s -> %s" % ( filename, target ) + log.warning( msg ) + return self.__display_exception_remotely( start_response, msg ) # Instantiate a database connection engine = sqlalchemy.create_engine( self.db_url ) connection = engine.connect() diff --git a/lib/tool_shed/capsule/capsule_manager.py b/lib/tool_shed/capsule/capsule_manager.py index 15b6fb43d87c..642b04f33d93 100644 --- a/lib/tool_shed/capsule/capsule_manager.py +++ b/lib/tool_shed/capsule/capsule_manager.py @@ -12,6 +12,7 @@ from galaxy.model.orm import and_ from galaxy.util import asbool from galaxy.util import CHUNK_SIZE +from galaxy.util import safe_relpath from galaxy.util.odict import odict from tool_shed.dependencies.repository.relation_builder import RelationBuilder @@ -727,29 +728,16 @@ def import_repository_archive( self, repository, repository_archive_dict ): repo = hg_util.get_repo_for_repository( self.app, repository=None, repo_path=repo_dir, create=False ) undesirable_dirs_removed = 0 undesirable_files_removed = 0 - ok, error_message = commit_util.check_archive( repository, archive ) - if ok: + check_results = commit_util.check_archive( repository, archive ) + # We filter out undesirable files but fail on undesriable dirs. Not + # sure why, just trying to maintain the same behavior as before. -nate + if not check_results.invalid and not check_results.undesirable_dirs: full_path = os.path.abspath( repo_dir ) - filenames_in_archive = [] - for tarinfo_obj in archive.getmembers(): - # Check files and directories in the archive. - ok = os.path.basename( tarinfo_obj.name ) not in commit_util.UNDESIRABLE_FILES - if ok: - for file_path_item in tarinfo_obj.name.split( '/' ): - if file_path_item in commit_util.UNDESIRABLE_DIRS: - undesirable_dirs_removed += 1 - error_message = 'Import failed: invalid file path %s in archive %s' % \ - ( str( file_path_item ), str( archive_file_name ) ) - results_dict[ 'ok' ] = False - results_dict[ 'error_message' ] += error_message - return results_dict - filenames_in_archive.append( tarinfo_obj.name ) - else: - undesirable_files_removed += 1 # Extract the uploaded archive to the repository root. - archive.extractall( path=full_path ) + archive.extractall( path=full_path, members=check_results.valid ) archive.close() - for filename in filenames_in_archive: + for tar_member in check_results.valid: + filename = tar_member.name uploaded_file_name = os.path.join( full_path, filename ) if os.path.split( uploaded_file_name )[ -1 ] == rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME: # Inspect the contents of the file to see if toolshed or changeset_revision attributes @@ -776,6 +764,9 @@ def import_repository_archive( self, repository, repository_archive_dict ): new_repo_alert = True # Since the repository is new, the following must be False. remove_repo_files_not_in_tar = False + filenames_in_archive = [ member.name for member in check_results.valid ] + undesirable_files_removed = len( check_results.undesirable_files ) + undesirable_dirs_removed = 0 ok, error_message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \ commit_util.handle_directory_changes( self.app, self.host, @@ -806,7 +797,13 @@ def import_repository_archive( self, repository, repository_archive_dict ): else: archive.close() results_dict[ 'ok' ] = False - results_dict[ 'error_message' ] += error_message + results_dict[ 'error_message' ] += 'Capsule errors were found: ' + if check_results.invalid: + results_dict[ 'error_message' ] += '%s Invalid files were: %s.' % ( + ' '.join( check_results.errors ), ', '.join( check_results.invalid ) ) + if check_results.undesirable_dirs: + results_dict[ 'error_message' ] += ' Undesirable directories were: %s.' % ( + ', '.join( check_results.undesirable_dirs ) ) return results_dict def upload_capsule( self, **kwd ): @@ -863,6 +860,12 @@ def upload_capsule( self, **kwd ): return_dict[ 'status' ] = 'error' uploaded_file.close() return return_dict + if not self.validate_archive_paths( tar_archive ): + return_dict[ 'status' ] = 'error' + return_dict[ 'message' ] = ( 'This capsule contains an invalid member type ' + 'or a file outside the archive path.' ) + uploaded_file.close() + return return_dict return_dict[ 'tar_archive' ] = tar_archive return_dict[ 'capsule_file_name' ] = uploaded_file_filename uploaded_file.close() @@ -872,6 +875,18 @@ def upload_capsule( self, **kwd ): return return_dict return return_dict + def validate_archive_paths( self, tar_archive ): + ''' + Inspect the archive contents to ensure that there are no risky symlinks. + Returns True if a suspicious path is found. + ''' + for member in tar_archive.getmembers(): + if not ( member.isdir() or member.isfile() or member.islnk() ): + return False + elif not safe_relpath( member.name ): + return False + return True + def validate_capsule( self, **kwd ): """ Inspect the uploaded capsule's manifest and its contained files to ensure it is a valid diff --git a/lib/tool_shed/util/commit_util.py b/lib/tool_shed/util/commit_util.py index 4b8d9680f686..cfefbb79ae22 100644 --- a/lib/tool_shed/util/commit_util.py +++ b/lib/tool_shed/util/commit_util.py @@ -4,8 +4,10 @@ import os import shutil import tempfile +from collections import namedtuple from galaxy.datatypes import checkers +from galaxy.util import safe_relpath from tool_shed.tools import data_table_manager @@ -21,30 +23,44 @@ UNDESIRABLE_FILES = [ '.hg_archival.txt', 'hgrc', '.DS_Store' ] def check_archive( repository, archive ): + valid = [] + invalid = [] + errors = [] + undesirable_files = [] + undesirable_dirs = [] for member in archive.getmembers(): # Allow regular files and directories only if not ( member.isdir() or member.isfile() or member.islnk() ): - message = "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc). " - message += "The problematic member in this archive is %s," % str( member.name ) - return False, message - for item in [ '.hg', '..', '/' ]: - if member.name.startswith( item ): - message = "Uploaded archives cannot contain .hg directories, absolute filenames starting with '/', or filenames with two dots '..'. " - message += "The problematic member in this archive is %s." % str( member.name ) - return False, message - if member.name in [ 'hgrc' ]: - message = "Uploaded archives cannot contain hgrc files. " - message += "The problematic member in this archive is %s." % str( member.name ) - return False, message + errors.append( "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)." ) + invalid.append( member ) + continue + if not safe_relpath( member.name ): + errors.append( "Uploaded archives cannot contain files that would extract outside of the archive." ) + invalid.append( member ) + continue + if os.path.basename( member.name ) in UNDESIRABLE_FILES: + undesirable_files.append( member ) + continue + head = tail = member.name + try: + while tail: + head, tail = os.path.split(head) + if tail in UNDESIRABLE_DIRS: + undesirable_dirs.append( member ) + assert False + except AssertionError: + continue if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and member.name != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME: - message = 'Repositories of type Repository suite definition can contain only a single file named repository_dependencies.xml.' - message += 'This archive contains a member named %s.' % str( member.name ) - return False, message + errors.append( 'Repositories of type Repository suite definition can contain only a single file named repository_dependencies.xml.' ) + invalid.append( member ) + continue if repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and member.name != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME: - message = 'Repositories of type Tool dependency definition can contain only a single file named tool_dependencies.xml.' - message += 'This archive contains a member named %s.' % str( member.name ) - return False, message - return True, '' + errors.append( 'Repositories of type Tool dependency definition can contain only a single file named tool_dependencies.xml.' ) + invalid.append( member ) + continue + valid.append( member ) + ArchiveCheckResults = namedtuple( 'ArchiveCheckResults', [ 'valid', 'invalid', 'undesirable_files', 'undesirable_dirs', 'errors' ] ) + return ArchiveCheckResults( valid, invalid, undesirable_files, undesirable_dirs, errors ) def check_file_contents_for_email_alerts( app ): """ diff --git a/lib/tool_shed/util/repository_content_util.py b/lib/tool_shed/util/repository_content_util.py index 99fc9793b833..1cf6660278c9 100644 --- a/lib/tool_shed/util/repository_content_util.py +++ b/lib/tool_shed/util/repository_content_util.py @@ -15,31 +15,23 @@ def upload_tar( trans, rdah, tdah, repository, tar, uploaded_file, upload_point, hg_util.get_repo_for_repository( trans.app, repository=None, repo_path=repo_dir, create=False ) undesirable_dirs_removed = 0 undesirable_files_removed = 0 - ok, message = commit_util.check_archive( repository, tar ) - if not ok: + check_results = commit_util.check_archive( repository, tar ) + if check_results.invalid: tar.close() uploaded_file.close() - return ok, message, [], '', undesirable_dirs_removed, undesirable_files_removed + message = '%s Invalid paths were: %s' % ( + ' '.join( check_results.errors ), ', '.join( check_results.invalid ) ) + return False, message, [], '', undesirable_dirs_removed, undesirable_files_removed else: if upload_point is not None: full_path = os.path.abspath( os.path.join( repo_dir, upload_point ) ) else: full_path = os.path.abspath( repo_dir ) - filenames_in_archive = [] - for tarinfo_obj in tar.getmembers(): - ok = os.path.basename( tarinfo_obj.name ) not in commit_util.UNDESIRABLE_FILES - if ok: - for file_path_item in tarinfo_obj.name.split( '/' ): - if file_path_item in commit_util.UNDESIRABLE_DIRS: - undesirable_dirs_removed += 1 - ok = False - break - else: - undesirable_files_removed += 1 - if ok: - filenames_in_archive.append( tarinfo_obj.name ) + undesirable_files_removed = len( check_results.undesirable_files ) + undesirable_dirs_removed = len( check_results.undesirable_dirs ) + filenames_in_archive = [ ti.name for ti in check_results.valid ] # Extract the uploaded tar to the load_point within the repository hierarchy. - tar.extractall( path=full_path ) + tar.extractall( path=full_path, members=check_results.valid ) tar.close() uploaded_file.close() for filename in filenames_in_archive: