Skip to content

Commit

Permalink
Added -q,--quiet option. Now detects short rows.
Browse files Browse the repository at this point in the history
  • Loading branch information
bl0b committed Sep 15, 2011
1 parent 2d52530 commit 21decbc
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
9 changes: 7 additions & 2 deletions csv_parser.py
@@ -1,12 +1,17 @@

import sys

class Csv(object):
class Row(tuple):
def __init__(self, a):
tuple.__init__(self, (x.strip() for x in a))
def __getitem__(self, x):
if type(x) in (tuple, list, set):
return Csv.Row((tuple.__getitem__(self, field) for field in x))
try:
return Csv.Row((tuple.__getitem__(self, field) for field in x))
except IndexError, ie:
print "Row is too short for ", x, ':', self
print "Aborting."
sys.exit(-1)
else:
return tuple.__getitem__(self, x)
def __init__(self, filename, sep='\t'):
Expand Down
16 changes: 10 additions & 6 deletions csv_xref.py
Expand Up @@ -14,6 +14,7 @@ def getopts(args):
parser.add_option("--rc", "--refcol", dest="refcol", help="columns indices in reference file that should be appended to rows of the input file", default=None)
parser.add_option("--ik", "--inputkey", dest="inkey", help="columns indices that define the cross-referencing key (input-side, defaults to refkey)", default=None)
parser.add_option("-u", "--uniq", dest="uniq", action="store_true", help="only output the unique cross-referenced payload entries", default=False)
parser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="don't output every little detail", default=False)
(options, args) = parser.parse_args(args)

fail = False
Expand Down Expand Up @@ -60,13 +61,14 @@ def uniq_init(I, R, payload):
def uniq_add(xref, row, payload=Csv.Row([])):
xref.add('\t'.join(payload))

def do_xref(ref, inp, outp, refk, ink, payload, uniq):
def do_xref(ref, inp, outp, refk, ink, payload, uniq, quiet):
R = Csv(ref)
I = Csv(inp)
out = outp=='stdout' and sys.stdout or file(outp, 'w')
blacklist = set(['seg', 'signal-peptide'])
ridx = R.make_index(refk, payload, blacklist)
print "Using (%s) as cross-referencing key"%(','.join(R.headers[refk]))
if not quiet:
print "Using (%s) as cross-referencing key"%(','.join(R.headers[refk]))
if uniq:
xref = uniq_init(I, R, payload)
add = uniq_add
Expand All @@ -82,21 +84,23 @@ def do_xref(ref, inp, outp, refk, ink, payload, uniq):
#xref.append('\t'.join(row).strip())
continue
if key not in ridx:
print "No match for (%s)"%(','.join(key))
if not quiet:
print "No match for (%s)"%(','.join(key))
nomatch.add(key)
add(xref, row)
#xref.append('\t'.join(row).strip())
continue
matches = ridx[row[ink]]
print len(matches), "match(es)"
if not quiet:
print len(matches), "match(es)"
for m in matches:
add(xref, row, m[payload])
add(xref, row, m)
#xref.append('\t'.join(row+m[payload]).strip())
for x in xref:
print >> out, x


if __name__=='__main__':
options, args = getopts(sys.argv[1:])
do_xref(options.ref, options.inp, options.outp, options.refkey, options.inkey, options.refcol, options.uniq)
do_xref(options.ref, options.inp, options.outp, options.refkey, options.inkey, options.refcol, options.uniq, options.quiet)

0 comments on commit 21decbc

Please sign in to comment.