From c1e3087ca35dbca1b0328954fe4769d666d3f934 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Thu, 15 Dec 2016 12:34:18 -0500 Subject: [PATCH] Fix an ACE vulnerability with gff filter tools. Thanks to David Wyde for the disclosure. --- tools/filters/gff/gff_filter_by_attribute.py | 152 ++++++++++++++++++ tools/filters/gff/gff_filter_by_attribute.xml | 2 +- .../gff/gff_filter_by_feature_count.py | 93 ++++++++++- .../gff/gff_filter_by_feature_count.xml | 2 +- 4 files changed, 246 insertions(+), 3 deletions(-) diff --git a/tools/filters/gff/gff_filter_by_attribute.py b/tools/filters/gff/gff_filter_by_attribute.py index 6b8a083ae10d..2eafa251987a 100644 --- a/tools/filters/gff/gff_filter_by_attribute.py +++ b/tools/filters/gff/gff_filter_by_attribute.py @@ -6,6 +6,7 @@ from __future__ import division import sys + from galaxy import eggs from galaxy.util.json import dumps, loads @@ -17,6 +18,152 @@ assert sys.version_info[:2] >= ( 2, 4 ) +from json import loads +from ast import Module, parse, walk + +AST_NODE_TYPE_WHITELIST = [ + 'Expr', 'Load', 'Str', 'Num', 'BoolOp', 'Compare', 'And', 'Eq', 'NotEq', + 'Or', 'GtE', 'LtE', 'Lt', 'Gt', 'BinOp', 'Add', 'Div', 'Sub', 'Mult', 'Mod', + 'Pow', 'LShift', 'GShift', 'BitAnd', 'BitOr', 'BitXor', 'UnaryOp', 'Invert', + 'Not', 'NotIn', 'In', 'Is', 'IsNot', 'List', 'Index', 'Subscript', + 'Name', +] + + +BUILTIN_AND_MATH_FUNCTIONS = 'abs|all|any|bin|chr|cmp|complex|divmod|float|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|exp|sqrt|ceil|floor'.split('|') +STRING_AND_LIST_METHODS = [ name for name in dir('') + dir([]) if not name.startswith('_') ] +VALID_FUNCTIONS = BUILTIN_AND_MATH_FUNCTIONS + STRING_AND_LIST_METHODS +# Name blacklist isn't strictly needed - but provides extra peace of mind. +NAME_BLACKLIST = ["exec", "eval", "globals", "locals", "__import__", "__builtins__"] + + +def __check_name( ast_node ): + name = ast_node.id + return name not in NAME_BLACKLIST + + +def check_simple_name( text ): + """ + + >>> check_simple_name("col_name") + True + >>> check_simple_name("c1=='chr1' and c3-c2>=2000 and c6=='+'") + False + >>> check_simple_name("eval('1+1')") + False + >>> check_simple_name("import sys") + False + >>> check_simple_name("[].__str__") + False + >>> check_simple_name("__builtins__") + False + >>> check_simple_name("'x' in globals") + False + >>> check_simple_name("'x' in [1,2,3]") + False + >>> check_simple_name("c3=='chr1' and c5>5") + False + >>> check_simple_name("c3=='chr1' and d5>5") + False + >>> check_simple_name("c3=='chr1' and c5>5 or exec") + False + >>> check_simple_name("type(c1) != type(1)") + False + >>> check_simple_name("c1.split(',')[1] == '1'") + False + >>> check_simple_name("exec 1") + False + >>> check_simple_name("str(c2) in [\\\"a\\\",\\\"b\\\"]") + False + >>> check_simple_name("__import__('os').system('touch /tmp/OOPS')") + False + """ + try: + module = parse( text ) + except SyntaxError: + return False + + if not isinstance(module, Module): + return False + statements = module.body + if not len( statements ) == 1: + return False + expression = statements[0] + if expression.__class__.__name__ != 'Expr': + return False + + for ast_node in walk( expression ): + ast_node_class = ast_node.__class__.__name__ + if ast_node_class not in ["Expr", "Name", "Load"]: + return False + + if ast_node_class == "Name" and not __check_name(ast_node): + return False + + return True + + +def check_expression( text ): + """ + + >>> check_expression("c1=='chr1' and c3-c2>=2000 and c6=='+'") + True + >>> check_expression("eval('1+1')") + False + >>> check_expression("import sys") + False + >>> check_expression("[].__str__") + False + >>> check_expression("__builtins__") + False + >>> check_expression("'x' in globals") + False + >>> check_expression("'x' in [1,2,3]") + True + >>> check_expression("c3=='chr1' and c5>5") + True + >>> check_expression("c3=='chr1' and d5>5") + True + >>> check_expression("c3=='chr1' and c5>5 or exec") + False + >>> check_expression("type(c1) != type(1)") + False + >>> check_expression("c1.split(',')[1] == '1'") + False + >>> check_expression("exec 1") + False + >>> check_expression("str(c2) in [\\\"a\\\",\\\"b\\\"]") + False + >>> check_expression("__import__('os').system('touch /tmp/OOPS')") + False + """ + try: + module = parse( text ) + except SyntaxError: + return False + + if not isinstance(module, Module): + return False + statements = module.body + if not len( statements ) == 1: + return False + expression = statements[0] + if expression.__class__.__name__ != 'Expr': + return False + + for ast_node in walk( expression ): + ast_node_class = ast_node.__class__.__name__ + + # Toss out everything that is not a "simple" expression, + # imports, error handling, etc... + if ast_node_class not in AST_NODE_TYPE_WHITELIST: + return False + + if ast_node_class == "Name" and not __check_name(ast_node): + return False + + return True + # # Helper functions. # @@ -57,6 +204,8 @@ def check_for_executable( text, description='' ): # Convert types from str to type objects. for name, a_type in attribute_types.items(): check_for_executable(a_type) + if not check_simple_name( a_type ): + stop_err("Problem with attribute type [%s]" % a_type) attribute_types[ name ] = eval( a_type ) # Unescape if input has been escaped @@ -76,6 +225,9 @@ def check_for_executable( text, description='' ): # Attempt to determine if the condition includes executable stuff and, if so, exit. check_for_executable( cond_text, 'condition') +if not check_expression(cond_text): + stop_err( "Illegal/invalid in condition '%s'" % ( cond_text ) ) + # Prepare the column variable names and wrappers for column data types. Only # prepare columns up to largest column in condition. attrs, type_casts = [], [] diff --git a/tools/filters/gff/gff_filter_by_attribute.xml b/tools/filters/gff/gff_filter_by_attribute.xml index 4e64b84126ea..32a7376687b0 100644 --- a/tools/filters/gff/gff_filter_by_attribute.xml +++ b/tools/filters/gff/gff_filter_by_attribute.xml @@ -1,4 +1,4 @@ - + using simple expressions gff_filter_by_attribute.py $input $out_file1 "$cond" '${input.metadata.attribute_types}' diff --git a/tools/filters/gff/gff_filter_by_feature_count.py b/tools/filters/gff/gff_filter_by_feature_count.py index 6dd648fe4dea..140d5b8f9770 100644 --- a/tools/filters/gff/gff_filter_by_feature_count.py +++ b/tools/filters/gff/gff_filter_by_feature_count.py @@ -8,8 +8,94 @@ import sys from galaxy import eggs from galaxy.datatypes.util.gff_util import GFFReaderWrapper + from bx.intervals.io import GenomicInterval +from ast import Module, parse, walk + +AST_NODE_TYPE_WHITELIST = [ + 'Expr', 'Load', 'Str', 'Num', 'BoolOp', 'Compare', 'And', 'Eq', 'NotEq', + 'Or', 'GtE', 'LtE', 'Lt', 'Gt', 'BinOp', 'Add', 'Div', 'Sub', 'Mult', 'Mod', + 'Pow', 'LShift', 'GShift', 'BitAnd', 'BitOr', 'BitXor', 'UnaryOp', 'Invert', + 'Not', 'NotIn', 'In', 'Is', 'IsNot', 'List', 'Index', 'Subscript', + 'Name', +] + + +BUILTIN_AND_MATH_FUNCTIONS = 'abs|all|any|bin|chr|cmp|complex|divmod|float|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|exp|sqrt|ceil|floor'.split('|') +STRING_AND_LIST_METHODS = [ name for name in dir('') + dir([]) if not name.startswith('_') ] +VALID_FUNCTIONS = BUILTIN_AND_MATH_FUNCTIONS + STRING_AND_LIST_METHODS +# Name blacklist isn't strictly needed - but provides extra peace of mind. +NAME_BLACKLIST = ["exec", "eval", "globals", "locals", "__import__", "__builtins__"] + + +def __check_name( ast_node ): + name = ast_node.id + return name not in NAME_BLACKLIST + + +def check_expression( text ): + """ + + >>> check_expression("c1=='chr1' and c3-c2>=2000 and c6=='+'") + True + >>> check_expression("eval('1+1')") + False + >>> check_expression("import sys") + False + >>> check_expression("[].__str__") + False + >>> check_expression("__builtins__") + False + >>> check_expression("'x' in globals") + False + >>> check_expression("'x' in [1,2,3]") + True + >>> check_expression("c3=='chr1' and c5>5") + True + >>> check_expression("c3=='chr1' and d5>5") + True + >>> check_expression("c3=='chr1' and c5>5 or exec") + False + >>> check_expression("type(c1) != type(1)") + False + >>> check_expression("c1.split(',')[1] == '1'") + False + >>> check_expression("exec 1") + False + >>> check_expression("str(c2) in [\\\"a\\\",\\\"b\\\"]") + False + >>> check_expression("__import__('os').system('touch /tmp/OOPS')") + False + """ + try: + module = parse( text ) + except SyntaxError: + return False + + if not isinstance(module, Module): + return False + statements = module.body + if not len( statements ) == 1: + return False + expression = statements[0] + if expression.__class__.__name__ != 'Expr': + return False + + for ast_node in walk( expression ): + ast_node_class = ast_node.__class__.__name__ + + # Toss out everything that is not a "simple" expression, + # imports, error handling, etc... + if ast_node_class not in AST_NODE_TYPE_WHITELIST: + return False + + if ast_node_class == "Name" and not __check_name(ast_node): + return False + + return True + + # Valid operators, ordered so that complex operators (e.g. '>=') are # recognized before simple operators (e.g. '>') ops = [ @@ -68,7 +154,12 @@ def __main__(): for interval in feature.intervals: if interval.feature == feature_name: count += 1 - if eval( '%s %s' % ( count, condition ) ): + eval_text = '%s %s' % ( count, condition ) + if not check_expression(eval_text): + print >> sys.stderr, "Invalid condition: %s, cannot filter." % condition + sys.exit(1) + + if eval(eval_text): # Keep feature. for interval in feature.intervals: out.write( "\t".join(interval.fields) + '\n' ) diff --git a/tools/filters/gff/gff_filter_by_feature_count.xml b/tools/filters/gff/gff_filter_by_feature_count.xml index 90fbd87c12fc..fdfc2e297503 100644 --- a/tools/filters/gff/gff_filter_by_feature_count.xml +++ b/tools/filters/gff/gff_filter_by_feature_count.xml @@ -1,4 +1,4 @@ - + using simple expressions gff_filter_by_feature_count.py $input_file1 $out_file1 "$feature_name" "$cond"