Skip to content
Browse files

added 'end messages' to several scripts

  • Loading branch information...
1 parent 433e860 commit ad301305110a745c53061689b9bd1a4119d682b6 @ctb ctb committed
Showing with 22 additions and 4 deletions.
  1. +8 −1 sandbox/split-pe.py
  2. +12 −3 sandbox/strip-and-split-for-assembly.py
  3. +2 −0 scripts/load-into-counting.py
View
9 sandbox/split-pe.py
@@ -6,12 +6,19 @@
fp1 = open(os.path.basename(sys.argv[1]) + '.1', 'w')
fp2 = open(os.path.basename(sys.argv[1]) + '.2', 'w')
+n1 = 0
+n2 = 0
for n, record in enumerate(screed.open(sys.argv[1])):
- if n % 10000 == 0:
+ if n % 100000 == 0:
print >>sys.stderr, '...', n
name = record.name
if name.endswith('/1'):
print >>fp1, '>%s\n%s' % (record.name, record.sequence,)
+ n1 += 1
elif name.endswith('/2'):
print >>fp2, '>%s\n%s' % (record.name, record.sequence,)
+ n2 += 1
+
+print >>sys.stderr, "DONE; split %d sequences (%d left, %d right)" % \
+ (n + 1, n1, n2)
View
15 sandbox/strip-and-split-for-assembly.py
@@ -23,6 +23,9 @@ def is_pair(name1, name2):
last_record = None
last_name = None
+n_pe = 0
+n_se = 0
+
print 'splitting pe/se sequences from %s to %s.{pe,se}' % (infile, outfile)
for n, record in enumerate(screed.open(sys.argv[1])):
if n % 100000 == 0 and n > 0:
@@ -35,8 +38,10 @@ def is_pair(name1, name2):
print >>paired_fp, '>%s\n%s' % (last_name, last_record['sequence'])
print >>paired_fp, '>%s\n%s' % (name, record['sequence'])
name, record = None, None
+ n_pe += 1
else:
print >>single_fp, '>%s\n%s' % (last_name, last_record['sequence'])
+ n_se += 1
last_name = name
last_record = record
@@ -46,17 +51,21 @@ def is_pair(name1, name2):
print >>paired_fp, '>%s\n%s' % (last_name, last_record['sequence'])
print >>paired_fp, '>%s\n%s' % (name, record['sequence'])
name, record = None, None
+ n_pe += 1
else:
print >>single_fp, '>%s\n%s' % (last_name, last_record['sequence'])
name, record = None, None
+ n_se += 1
if record:
print >>single_fp, '>%s\n%s' % (name, record['sequence'])
+ n_se += 1
single_fp.close()
paired_fp.close()
-### check, at the end, to see if it worked!
-paired_fp = open(outfile + '.pe')
-if not paired_fp.read(1):
+if n_pe == 0:
raise Exception("no paired reads!? check file formats...")
+
+print 'DONE; read %d sequences, %d pairs and %d singletons' % \
+ (n + 1, n_pe, n_se)
View
2 scripts/load-into-counting.py
@@ -78,5 +78,7 @@ def main():
print >>sys.stderr, "**"
sys.exit(-1)
+ print 'DONE.'
+
if __name__ == '__main__':
main()

0 comments on commit ad30130

Please sign in to comment.
Something went wrong with that request. Please try again.