Skip to content

Commit

Permalink
fixing parallel.sh default to merge, cleaning code
Browse files Browse the repository at this point in the history
  • Loading branch information
Panciera committed Mar 31, 2015
1 parent 2486665 commit a9fea0a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 8 deletions.
2 changes: 1 addition & 1 deletion parallel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mkdir -p $outdir
outfile=${bamfile//[\/\.]/_}.minimized.$depth
ngs="/home/AMED/michael.panciera/projects/ngs_mapper/ngs_mapper"
compiled=$outdir/compiled.${outfile}.bam;
MERGE=false
MERGE=true
let i=0
for ref in $refs;
do
Expand Down
8 changes: 1 addition & 7 deletions subsample_mindepth.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import re
import random

''' Python3 compatibility '''

''' Python3 compatibility '''
from past.builtins import map, xrange, filter

class CommonEqualityMixin(object):
Expand Down Expand Up @@ -88,7 +87,6 @@ def yield_greatest_overlaps(self, under_index, num_needed):
while matches < num_needed and candidate_sequences:
# could instead keep sequences in order sorted by overlap
if self.more_random:
#import ipdb; ipdb.set_trace()
next_seq = random.choice(candidate_sequences)
else:
next_seq = max(candidate_sequences, key=lambda seq: seq.overlap)
Expand Down Expand Up @@ -124,7 +122,6 @@ def get_depths(self, reads):
'''
#TODO:Needlessly creates a depth_array of equal size to self.depth_array. Also needlessly slow.
depths = np.zeros(len(self.depth_array))
#import ipdb; ipdb.set_trace()
for seq in reads:
''' Even if a sequence would overlap past the length of depth-array, we don't include that in depth-array.
the numpy arrays must be the same size in order to add them. '''
Expand All @@ -138,7 +135,6 @@ def make_seq_matrix(self, bamfile, regionstr):
:param str regionstr: reference sequence and length as listed in .bam header i.e. <refname>:1-1000
Parse a bam file using sam tools, and store the alignments as a 2d matrix, where each row is a posiiton in the reference sequence.
'''
#import ipdb; ipdb.set_trace()
all_alignments = get_alignments(bamfile, regionstr)
max_pos = max([seq.pos for seq in all_alignments])
max_overlap = max([seq.overlap for seq in all_alignments])
Expand All @@ -152,7 +148,6 @@ def minimize_depths(self):
Trim self.seq_matrix to minimize coverage overflow.
For each position in the reference sequence, pick reads until the minimum depth is met if possible.
'''
#import ipdb; ipdb.set_trace()
for pos, depth in enumerate(self.depth_array):
if depth < self.min_depth:
needed_depth = self.min_depth - depth
Expand Down Expand Up @@ -203,7 +198,6 @@ def main():
# region_str = args.regionstr.split(':')[0]
matrix = DepthMatrix(args.subsample, allow_orphans=args.count_orphans, more_random=args.more_random)
matrix.make_seq_matrix(args.bamfile, args.refseq)
#if args.more_random: import ipdb; ipdb.set_trace()
matrix.minimize_depths()
''' Flatten the matrix '''
sampled_seqs = flatten_and_filter_matrix(matrix.seq_matrix)
Expand Down

0 comments on commit a9fea0a

Please sign in to comment.