Skip to content

Commit

Permalink
Merge branch 'release_15.05' into release_15.07
Browse files Browse the repository at this point in the history
  • Loading branch information
natefoo committed Dec 15, 2016
2 parents 30608c1 + 7c443ed commit 790e60a
Show file tree
Hide file tree
Showing 4 changed files with 246 additions and 3 deletions.
152 changes: 152 additions & 0 deletions tools/filters/gff/gff_filter_by_attribute.py
Expand Up @@ -6,6 +6,7 @@

from __future__ import division
import sys

from galaxy import eggs
from galaxy.util.json import dumps, loads

Expand All @@ -17,6 +18,152 @@

assert sys.version_info[:2] >= ( 2, 4 )

from json import loads
from ast import Module, parse, walk

AST_NODE_TYPE_WHITELIST = [
'Expr', 'Load', 'Str', 'Num', 'BoolOp', 'Compare', 'And', 'Eq', 'NotEq',
'Or', 'GtE', 'LtE', 'Lt', 'Gt', 'BinOp', 'Add', 'Div', 'Sub', 'Mult', 'Mod',
'Pow', 'LShift', 'GShift', 'BitAnd', 'BitOr', 'BitXor', 'UnaryOp', 'Invert',
'Not', 'NotIn', 'In', 'Is', 'IsNot', 'List', 'Index', 'Subscript',
'Name',
]


BUILTIN_AND_MATH_FUNCTIONS = 'abs|all|any|bin|chr|cmp|complex|divmod|float|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|exp|sqrt|ceil|floor'.split('|')
STRING_AND_LIST_METHODS = [ name for name in dir('') + dir([]) if not name.startswith('_') ]
VALID_FUNCTIONS = BUILTIN_AND_MATH_FUNCTIONS + STRING_AND_LIST_METHODS
# Name blacklist isn't strictly needed - but provides extra peace of mind.
NAME_BLACKLIST = ["exec", "eval", "globals", "locals", "__import__", "__builtins__"]


def __check_name( ast_node ):
name = ast_node.id
return name not in NAME_BLACKLIST


def check_simple_name( text ):
"""
>>> check_simple_name("col_name")
True
>>> check_simple_name("c1=='chr1' and c3-c2>=2000 and c6=='+'")
False
>>> check_simple_name("eval('1+1')")
False
>>> check_simple_name("import sys")
False
>>> check_simple_name("[].__str__")
False
>>> check_simple_name("__builtins__")
False
>>> check_simple_name("'x' in globals")
False
>>> check_simple_name("'x' in [1,2,3]")
False
>>> check_simple_name("c3=='chr1' and c5>5")
False
>>> check_simple_name("c3=='chr1' and d5>5")
False
>>> check_simple_name("c3=='chr1' and c5>5 or exec")
False
>>> check_simple_name("type(c1) != type(1)")
False
>>> check_simple_name("c1.split(',')[1] == '1'")
False
>>> check_simple_name("exec 1")
False
>>> check_simple_name("str(c2) in [\\\"a\\\",\\\"b\\\"]")
False
>>> check_simple_name("__import__('os').system('touch /tmp/OOPS')")
False
"""
try:
module = parse( text )
except SyntaxError:
return False

if not isinstance(module, Module):
return False
statements = module.body
if not len( statements ) == 1:
return False
expression = statements[0]
if expression.__class__.__name__ != 'Expr':
return False

for ast_node in walk( expression ):
ast_node_class = ast_node.__class__.__name__
if ast_node_class not in ["Expr", "Name", "Load"]:
return False

if ast_node_class == "Name" and not __check_name(ast_node):
return False

return True


def check_expression( text ):
"""
>>> check_expression("c1=='chr1' and c3-c2>=2000 and c6=='+'")
True
>>> check_expression("eval('1+1')")
False
>>> check_expression("import sys")
False
>>> check_expression("[].__str__")
False
>>> check_expression("__builtins__")
False
>>> check_expression("'x' in globals")
False
>>> check_expression("'x' in [1,2,3]")
True
>>> check_expression("c3=='chr1' and c5>5")
True
>>> check_expression("c3=='chr1' and d5>5")
True
>>> check_expression("c3=='chr1' and c5>5 or exec")
False
>>> check_expression("type(c1) != type(1)")
False
>>> check_expression("c1.split(',')[1] == '1'")
False
>>> check_expression("exec 1")
False
>>> check_expression("str(c2) in [\\\"a\\\",\\\"b\\\"]")
False
>>> check_expression("__import__('os').system('touch /tmp/OOPS')")
False
"""
try:
module = parse( text )
except SyntaxError:
return False

if not isinstance(module, Module):
return False
statements = module.body
if not len( statements ) == 1:
return False
expression = statements[0]
if expression.__class__.__name__ != 'Expr':
return False

for ast_node in walk( expression ):
ast_node_class = ast_node.__class__.__name__

# Toss out everything that is not a "simple" expression,
# imports, error handling, etc...
if ast_node_class not in AST_NODE_TYPE_WHITELIST:
return False

if ast_node_class == "Name" and not __check_name(ast_node):
return False

return True

#
# Helper functions.
#
Expand Down Expand Up @@ -57,6 +204,8 @@ def check_for_executable( text, description='' ):
# Convert types from str to type objects.
for name, a_type in attribute_types.items():
check_for_executable(a_type)
if not check_simple_name( a_type ):
stop_err("Problem with attribute type [%s]" % a_type)
attribute_types[ name ] = eval( a_type )

# Unescape if input has been escaped
Expand All @@ -76,6 +225,9 @@ def check_for_executable( text, description='' ):
# Attempt to determine if the condition includes executable stuff and, if so, exit.
check_for_executable( cond_text, 'condition')

if not check_expression(cond_text):
stop_err( "Illegal/invalid in condition '%s'" % ( cond_text ) )

# Prepare the column variable names and wrappers for column data types. Only
# prepare columns up to largest column in condition.
attrs, type_casts = [], []
Expand Down
2 changes: 1 addition & 1 deletion tools/filters/gff/gff_filter_by_attribute.xml
@@ -1,4 +1,4 @@
<tool id="gff_filter_by_attribute" name="Filter GFF data by attribute" version="0.1">
<tool id="gff_filter_by_attribute" name="Filter GFF data by attribute" version="0.1.1">
<description>using simple expressions</description>
<command interpreter="python">
gff_filter_by_attribute.py $input $out_file1 "$cond" '${input.metadata.attribute_types}'
Expand Down
93 changes: 92 additions & 1 deletion tools/filters/gff/gff_filter_by_feature_count.py
Expand Up @@ -8,8 +8,94 @@
import sys
from galaxy import eggs
from galaxy.datatypes.util.gff_util import GFFReaderWrapper

from bx.intervals.io import GenomicInterval

from ast import Module, parse, walk

AST_NODE_TYPE_WHITELIST = [
'Expr', 'Load', 'Str', 'Num', 'BoolOp', 'Compare', 'And', 'Eq', 'NotEq',
'Or', 'GtE', 'LtE', 'Lt', 'Gt', 'BinOp', 'Add', 'Div', 'Sub', 'Mult', 'Mod',
'Pow', 'LShift', 'GShift', 'BitAnd', 'BitOr', 'BitXor', 'UnaryOp', 'Invert',
'Not', 'NotIn', 'In', 'Is', 'IsNot', 'List', 'Index', 'Subscript',
'Name',
]


BUILTIN_AND_MATH_FUNCTIONS = 'abs|all|any|bin|chr|cmp|complex|divmod|float|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|exp|sqrt|ceil|floor'.split('|')
STRING_AND_LIST_METHODS = [ name for name in dir('') + dir([]) if not name.startswith('_') ]
VALID_FUNCTIONS = BUILTIN_AND_MATH_FUNCTIONS + STRING_AND_LIST_METHODS
# Name blacklist isn't strictly needed - but provides extra peace of mind.
NAME_BLACKLIST = ["exec", "eval", "globals", "locals", "__import__", "__builtins__"]


def __check_name( ast_node ):
name = ast_node.id
return name not in NAME_BLACKLIST


def check_expression( text ):
"""
>>> check_expression("c1=='chr1' and c3-c2>=2000 and c6=='+'")
True
>>> check_expression("eval('1+1')")
False
>>> check_expression("import sys")
False
>>> check_expression("[].__str__")
False
>>> check_expression("__builtins__")
False
>>> check_expression("'x' in globals")
False
>>> check_expression("'x' in [1,2,3]")
True
>>> check_expression("c3=='chr1' and c5>5")
True
>>> check_expression("c3=='chr1' and d5>5")
True
>>> check_expression("c3=='chr1' and c5>5 or exec")
False
>>> check_expression("type(c1) != type(1)")
False
>>> check_expression("c1.split(',')[1] == '1'")
False
>>> check_expression("exec 1")
False
>>> check_expression("str(c2) in [\\\"a\\\",\\\"b\\\"]")
False
>>> check_expression("__import__('os').system('touch /tmp/OOPS')")
False
"""
try:
module = parse( text )
except SyntaxError:
return False

if not isinstance(module, Module):
return False
statements = module.body
if not len( statements ) == 1:
return False
expression = statements[0]
if expression.__class__.__name__ != 'Expr':
return False

for ast_node in walk( expression ):
ast_node_class = ast_node.__class__.__name__

# Toss out everything that is not a "simple" expression,
# imports, error handling, etc...
if ast_node_class not in AST_NODE_TYPE_WHITELIST:
return False

if ast_node_class == "Name" and not __check_name(ast_node):
return False

return True


# Valid operators, ordered so that complex operators (e.g. '>=') are
# recognized before simple operators (e.g. '>')
ops = [
Expand Down Expand Up @@ -68,7 +154,12 @@ def __main__():
for interval in feature.intervals:
if interval.feature == feature_name:
count += 1
if eval( '%s %s' % ( count, condition ) ):
eval_text = '%s %s' % ( count, condition )
if not check_expression(eval_text):
print >> sys.stderr, "Invalid condition: %s, cannot filter." % condition
sys.exit(1)

if eval(eval_text):
# Keep feature.
for interval in feature.intervals:
out.write( "\t".join(interval.fields) + '\n' )
Expand Down
2 changes: 1 addition & 1 deletion tools/filters/gff/gff_filter_by_feature_count.xml
@@ -1,4 +1,4 @@
<tool id="gff_filter_by_feature_count" name="Filter GFF data by feature count" version="0.1">
<tool id="gff_filter_by_feature_count" name="Filter GFF data by feature count" version="0.1.1">
<description>using simple expressions</description>
<command interpreter="python">
gff_filter_by_feature_count.py $input_file1 $out_file1 "$feature_name" "$cond"
Expand Down

0 comments on commit 790e60a

Please sign in to comment.