Skip to content

Commit

Permalink
Merge pull request #6577 from bernt-matthias/topic/grouping-wreplace
Browse files Browse the repository at this point in the history
improved grouping tool
  • Loading branch information
jmchilton committed Dec 12, 2018
2 parents c6fc4dc + 7b027d8 commit d06718e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 8 deletions.
37 changes: 33 additions & 4 deletions tools/stats/grouping.py
Expand Up @@ -3,6 +3,13 @@
# Refactored 2011 to use numpy instead of rpy, Kanwei Li
"""
This tool provides the SQL "group by" functionality.
Arguments:
1 output file name
2 input file name
3 grouping column
4 ignore case (1/0)
5 ascii to delete (comma separated list)
6... op,col,do_round,default
"""
from __future__ import print_function

Expand All @@ -15,6 +22,22 @@
import numpy


def float_wdefault(s, d, c):
"""
convert list of strings s into list of floats
non convertable entries are replaced by d if d is not None (otherwise error)
"""
for i in range(len(s)):
try:
s[i] = float(s[i])
except ValueError:
if d is not None:
s[i] = d
else:
stop_err("non float value '%s' found in colum %d" % (s[i], c))
return s


def stop_err(msg):
sys.stderr.write(msg)
sys.exit()
Expand All @@ -38,7 +61,9 @@ def main():
ops = []
cols = []
round_val = []
default_val = []

# remove comment lines
if sys.argv[5] != "None":
asciitodelete = sys.argv[5]
if asciitodelete:
Expand All @@ -55,25 +80,29 @@ def main():
newfile.close()
inputfile = newinputfile

# get operations and options in separate arrays
for var in sys.argv[6:]:
op, col, do_round = var.split()
op, col, do_round, default = var.split(',')
ops.append(op)
cols.append(col)
round_val.append(do_round)
default_val.append(float(default) if default != '' else None)

"""
At this point, ops, cols and rounds will look something like this:
ops: ['mean', 'min', 'c']
cols: ['1', '3', '4']
round_val: ['no', 'yes' 'no']
default_val: [0, 1, None]
"""

try:
group_col = int(sys.argv[3]) - 1
except Exception:
stop_err("Group column not specified.")

# sort file into a temporary file
tmpfile = tempfile.NamedTemporaryFile(mode='r')

try:
"""
The -k option for the Posix sort command is as follows:
Expand Down Expand Up @@ -112,7 +141,7 @@ def is_new_item(line):
out_str = key

for line in line_list:
fields = line.strip().split("\t")
fields = line.split("\t")
for i, col in enumerate(cols):
col = int(col) - 1 # cXX from galaxy is 1-based
try:
Expand Down Expand Up @@ -141,7 +170,7 @@ def is_new_item(line):
else:
# some kind of numpy fn
try:
data = [float(_) for _ in data]
data = float_wdefault(data, default_val[i], col + 1)
except ValueError:
sys.stderr.write("Operation %s expected number values but got %s instead.\n" % (op, data))
sys.exit(1)
Expand Down
7 changes: 3 additions & 4 deletions tools/stats/grouping.xml
@@ -1,4 +1,4 @@
<tool id="Grouping1" name="Group" version="2.1.1">
<tool id="Grouping1" name="Group" version="2.1.2">
<description>data by a column and perform aggregate operation on other columns.</description>
<command interpreter="python">
grouping.py
Expand All @@ -8,9 +8,7 @@
"${ignorecase}"
"${ignorelines}"
#for $op in $operations
'${op.optype}
${op.opcol}
${op.opround}'
'${op.optype},${op.opcol},${op.opround},${op.opdefault}'
#end for
</command>
<inputs>
Expand Down Expand Up @@ -57,6 +55,7 @@
<option value="no">NO</option>
<option value="yes">YES</option>
</param>
<param name="opdefault" type="float" optional="true" label="Replace non numeric data" help="leave empty for no replacements. Will replace, e.g., empty cells and text cells."/>
</repeat>
</inputs>
<outputs>
Expand Down

0 comments on commit d06718e

Please sign in to comment.