Skip to content

Commit

Permalink
Security fixes for tool shed hg push and capsule/tarball uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
natefoo committed Feb 24, 2016
1 parent 4f3397f commit 4845a39
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 59 deletions.
21 changes: 19 additions & 2 deletions lib/galaxy/webapps/tool_shed/framework/middleware/hg.py
Expand Up @@ -9,7 +9,7 @@
from paste.httpheaders import AUTH_TYPE
from paste.httpheaders import REMOTE_USER

from galaxy.util import asbool
from galaxy.util import asbool, safe_relpath
from galaxy.util.hash_util import new_secure_hash
from tool_shed.util import hg_util
import tool_shed.repository_types.util as rt_util
Expand Down Expand Up @@ -113,7 +113,11 @@ def __call__( self, environ, start_response ):
fh.write( chunk )
fh.close()
fh = open( tmp_filename, 'rb' )
changeset_groups = json.loads( hg_util.bundle_to_json( fh ) )
try:
changeset_groups = json.loads( hg_util.bundle_to_json( fh ) )
except AttributeError:
msg = 'Your version of Mercurial is not supported. Please use a version < 3.5'
return self.__display_exception_remotely( start_response, msg )
fh.close()
try:
os.unlink( tmp_filename )
Expand All @@ -122,6 +126,19 @@ def __call__( self, environ, start_response ):
if changeset_groups:
# Check the repository type to make sure inappropriate files are not being pushed.
if 'PATH_INFO' in environ:
# Ensure there are no symlinks with targets outside the repo
for entry in changeset_groups:
if len( entry ) == 2:
filename, change_list = entry
if not isinstance(change_list, list):
change_list = [change_list]
for change in change_list:
for patch in change['data']:
target = patch['block'].strip()
if ( ( patch['end'] - patch['start'] == 0 ) and not safe_relpath( target ) ):
msg = "Changes include a symlink outside of the repository: %s -> %s" % ( filename, target )
log.warning( msg )
return self.__display_exception_remotely( start_response, msg )
# Instantiate a database connection
engine = sqlalchemy.create_engine( self.db_url )
connection = engine.connect()
Expand Down
57 changes: 36 additions & 21 deletions lib/tool_shed/capsule/capsule_manager.py
Expand Up @@ -12,6 +12,7 @@
from galaxy.model.orm import and_
from galaxy.util import asbool
from galaxy.util import CHUNK_SIZE
from galaxy.util import safe_relpath
from galaxy.util.odict import odict

from tool_shed.dependencies.repository.relation_builder import RelationBuilder
Expand Down Expand Up @@ -727,29 +728,16 @@ def import_repository_archive( self, repository, repository_archive_dict ):
repo = hg_util.get_repo_for_repository( self.app, repository=None, repo_path=repo_dir, create=False )
undesirable_dirs_removed = 0
undesirable_files_removed = 0
ok, error_message = commit_util.check_archive( repository, archive )
if ok:
check_results = commit_util.check_archive( repository, archive )
# We filter out undesirable files but fail on undesriable dirs. Not
# sure why, just trying to maintain the same behavior as before. -nate
if not check_results.invalid and not check_results.undesirable_dirs:
full_path = os.path.abspath( repo_dir )
filenames_in_archive = []
for tarinfo_obj in archive.getmembers():
# Check files and directories in the archive.
ok = os.path.basename( tarinfo_obj.name ) not in commit_util.UNDESIRABLE_FILES
if ok:
for file_path_item in tarinfo_obj.name.split( '/' ):
if file_path_item in commit_util.UNDESIRABLE_DIRS:
undesirable_dirs_removed += 1
error_message = 'Import failed: invalid file path <b>%s</b> in archive <b>%s</b>' % \
( str( file_path_item ), str( archive_file_name ) )
results_dict[ 'ok' ] = False
results_dict[ 'error_message' ] += error_message
return results_dict
filenames_in_archive.append( tarinfo_obj.name )
else:
undesirable_files_removed += 1
# Extract the uploaded archive to the repository root.
archive.extractall( path=full_path )
archive.extractall( path=full_path, members=check_results.valid )
archive.close()
for filename in filenames_in_archive:
for tar_member in check_results.valid:
filename = tar_member.name
uploaded_file_name = os.path.join( full_path, filename )
if os.path.split( uploaded_file_name )[ -1 ] == rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME:
# Inspect the contents of the file to see if toolshed or changeset_revision attributes
Expand All @@ -776,6 +764,9 @@ def import_repository_archive( self, repository, repository_archive_dict ):
new_repo_alert = True
# Since the repository is new, the following must be False.
remove_repo_files_not_in_tar = False
filenames_in_archive = [ member.name for member in check_results.valid ]
undesirable_files_removed = len( check_results.undesirable_files )
undesirable_dirs_removed = 0
ok, error_message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
commit_util.handle_directory_changes( self.app,
self.host,
Expand Down Expand Up @@ -806,7 +797,13 @@ def import_repository_archive( self, repository, repository_archive_dict ):
else:
archive.close()
results_dict[ 'ok' ] = False
results_dict[ 'error_message' ] += error_message
results_dict[ 'error_message' ] += 'Capsule errors were found: '
if check_results.invalid:
results_dict[ 'error_message' ] += '%s Invalid files were: %s.' % (
' '.join( check_results.errors ), ', '.join( check_results.invalid ) )
if check_results.undesirable_dirs:
results_dict[ 'error_message' ] += ' Undesirable directories were: %s.' % (
', '.join( check_results.undesirable_dirs ) )
return results_dict

def upload_capsule( self, **kwd ):
Expand Down Expand Up @@ -863,6 +860,12 @@ def upload_capsule( self, **kwd ):
return_dict[ 'status' ] = 'error'
uploaded_file.close()
return return_dict
if not self.validate_archive_paths( tar_archive ):
return_dict[ 'status' ] = 'error'
return_dict[ 'message' ] = ( 'This capsule contains an invalid member type '
'or a file outside the archive path.' )
uploaded_file.close()
return return_dict
return_dict[ 'tar_archive' ] = tar_archive
return_dict[ 'capsule_file_name' ] = uploaded_file_filename
uploaded_file.close()
Expand All @@ -872,6 +875,18 @@ def upload_capsule( self, **kwd ):
return return_dict
return return_dict

def validate_archive_paths( self, tar_archive ):
'''
Inspect the archive contents to ensure that there are no risky symlinks.
Returns True if a suspicious path is found.
'''
for member in tar_archive.getmembers():
if not ( member.isdir() or member.isfile() or member.islnk() ):
return False
elif not safe_relpath( member.name ):
return False
return True

def validate_capsule( self, **kwd ):
"""
Inspect the uploaded capsule's manifest and its contained files to ensure it is a valid
Expand Down
54 changes: 35 additions & 19 deletions lib/tool_shed/util/commit_util.py
Expand Up @@ -4,8 +4,10 @@
import os
import shutil
import tempfile
from collections import namedtuple

from galaxy.datatypes import checkers
from galaxy.util import safe_relpath

from tool_shed.tools import data_table_manager

Expand All @@ -21,30 +23,44 @@
UNDESIRABLE_FILES = [ '.hg_archival.txt', 'hgrc', '.DS_Store' ]

def check_archive( repository, archive ):
valid = []
invalid = []
errors = []
undesirable_files = []
undesirable_dirs = []
for member in archive.getmembers():
# Allow regular files and directories only
if not ( member.isdir() or member.isfile() or member.islnk() ):
message = "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc). "
message += "The problematic member in this archive is %s," % str( member.name )
return False, message
for item in [ '.hg', '..', '/' ]:
if member.name.startswith( item ):
message = "Uploaded archives cannot contain .hg directories, absolute filenames starting with '/', or filenames with two dots '..'. "
message += "The problematic member in this archive is %s." % str( member.name )
return False, message
if member.name in [ 'hgrc' ]:
message = "Uploaded archives cannot contain hgrc files. "
message += "The problematic member in this archive is %s." % str( member.name )
return False, message
errors.append( "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)." )
invalid.append( member )
continue
if not safe_relpath( member.name ):
errors.append( "Uploaded archives cannot contain files that would extract outside of the archive." )
invalid.append( member )
continue
if os.path.basename( member.name ) in UNDESIRABLE_FILES:
undesirable_files.append( member )
continue
head = tail = member.name
try:
while tail:
head, tail = os.path.split(head)
if tail in UNDESIRABLE_DIRS:
undesirable_dirs.append( member )
assert False
except AssertionError:
continue
if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and member.name != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME:
message = 'Repositories of type <b>Repository suite definition</b> can contain only a single file named <b>repository_dependencies.xml</b>.'
message += 'This archive contains a member named %s.' % str( member.name )
return False, message
errors.append( 'Repositories of type <b>Repository suite definition</b> can contain only a single file named <b>repository_dependencies.xml</b>.' )
invalid.append( member )
continue
if repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and member.name != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME:
message = 'Repositories of type <b>Tool dependency definition</b> can contain only a single file named <b>tool_dependencies.xml</b>.'
message += 'This archive contains a member named %s.' % str( member.name )
return False, message
return True, ''
errors.append( 'Repositories of type <b>Tool dependency definition</b> can contain only a single file named <b>tool_dependencies.xml</b>.' )
invalid.append( member )
continue
valid.append( member )
ArchiveCheckResults = namedtuple( 'ArchiveCheckResults', [ 'valid', 'invalid', 'undesirable_files', 'undesirable_dirs', 'errors' ] )
return ArchiveCheckResults( valid, invalid, undesirable_files, undesirable_dirs, errors )

def check_file_contents_for_email_alerts( app ):
"""
Expand Down
26 changes: 9 additions & 17 deletions lib/tool_shed/util/repository_content_util.py
Expand Up @@ -15,31 +15,23 @@ def upload_tar( trans, rdah, tdah, repository, tar, uploaded_file, upload_point,
hg_util.get_repo_for_repository( trans.app, repository=None, repo_path=repo_dir, create=False )
undesirable_dirs_removed = 0
undesirable_files_removed = 0
ok, message = commit_util.check_archive( repository, tar )
if not ok:
check_results = commit_util.check_archive( repository, tar )
if check_results.invalid:
tar.close()
uploaded_file.close()
return ok, message, [], '', undesirable_dirs_removed, undesirable_files_removed
message = '%s Invalid paths were: %s' % (
' '.join( check_results.errors ), ', '.join( check_results.invalid ) )
return False, message, [], '', undesirable_dirs_removed, undesirable_files_removed
else:
if upload_point is not None:
full_path = os.path.abspath( os.path.join( repo_dir, upload_point ) )
else:
full_path = os.path.abspath( repo_dir )
filenames_in_archive = []
for tarinfo_obj in tar.getmembers():
ok = os.path.basename( tarinfo_obj.name ) not in commit_util.UNDESIRABLE_FILES
if ok:
for file_path_item in tarinfo_obj.name.split( '/' ):
if file_path_item in commit_util.UNDESIRABLE_DIRS:
undesirable_dirs_removed += 1
ok = False
break
else:
undesirable_files_removed += 1
if ok:
filenames_in_archive.append( tarinfo_obj.name )
undesirable_files_removed = len( check_results.undesirable_files )
undesirable_dirs_removed = len( check_results.undesirable_dirs )
filenames_in_archive = [ ti.name for ti in check_results.valid ]
# Extract the uploaded tar to the load_point within the repository hierarchy.
tar.extractall( path=full_path )
tar.extractall( path=full_path, members=check_results.valid )
tar.close()
uploaded_file.close()
for filename in filenames_in_archive:
Expand Down

0 comments on commit 4845a39

Please sign in to comment.