Skip to content

Commit

Permalink
added the capability to read reverse read via a second column in the …
Browse files Browse the repository at this point in the history
…input as well as reverting output to a similar format as previous version
  • Loading branch information
jwcodee committed Jan 24, 2019
1 parent 693c8fa commit f98d240
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
22 changes: 19 additions & 3 deletions prince/match_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def combine_records(record1,record2):
else:
return itertools.chain(record1,record2)

def get_reads_records(filename):
def get_reads_records(filename, filename_rev = "" ):
'''
Retrieves records for the reads.
Inputs:
Expand All @@ -30,17 +30,33 @@ def get_reads_records(filename):
gzip_handle2 = None
# First, check whether filename is actually just a path
columns = filename.split('.')
if filename_rev != "":
columns_rev = filename.split('.')
if columns[-1] == 'gz':
try:
gzip_handle1 = gzip.open(filename,'rt')
record1 = SeqIO.parse(gzip_handle1,'fastq')
except:
raise IOError("Cannot open target file %s." % filename)
#assuming if first file is gz, second file is gz too
if filename_rev != "":
try:
gzip_handle2 = gzip.open(filename_rev,'rt')
record2 = SeqIO.parse(gzip_handle2,'fastq')
except:
raise IOError("Cannot open target file %s." % filename_rev)

elif columns[-1] == 'fastq' or columns[-1] == 'fq':
try:
record1 = SeqIO.parse(filename,'fastq')
except:
raise IOError("Can not open target file %s." % filename)
#assuming if first file is fastq or fq, second file is the same too
if filename_rev != "":
try:
record2 = SeqIO.parse(filename_rev,'fastq')
except:
raise IOError("Cannot open target file %s." % filename_rev)
# Otherwise, check all possible file extensions
else:
delimiters = ['','_']
Expand Down Expand Up @@ -86,12 +102,12 @@ def get_reads_records(filename):
return record1,record2,gzip_handle1,gzip_handle2


def compute_match_score(filename, template_obj, kmerLength, primers):
def compute_match_score(filename, filename_rev, template_obj, kmerLength, primers):
'''
Inputs:
- (str) data_prefix: the prefix of the NGS dataset paths
'''
record1, record2, gzip1, gzip2 = get_reads_records(filename)
record1, record2, gzip1, gzip2 = get_reads_records(filename, filename_rev)
#Run reads through Coarse Filtering to drastically reduce computation for Fine Filtering
reads = combine_records(record1,record2)

Expand Down
17 changes: 11 additions & 6 deletions prince/query_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,26 @@ def test_target(opts, template_obj, primers):
query = file.readline().strip("\n")
while query:
start_time = time.time()
targetFileName = query.split("/")[-1] #CHANGE
targetFileName = query.split("\t")[0].split("/")[-1]
target_forward = query.split("\t")[0]
try:
target_reverse = query.split("\t")[1]
except:
target_reverse = ""
print("\nQuerying %s" % targetFileName)

targetMatchScore = compute_match_score(query, template_obj, opts.k, primers)
targetMatchScore = compute_match_score(target_forward, target_reverse, template_obj, opts.k, primers)

data = get_data(opts.boost_output)
equations = get_equations(data)
predictions = []

# Write target predictions to text file
with open(opts.target_output, "a+") as f:
if os.path.getsize(opts.target_output) == 0:
f.write("Templates,")
f.write(",".join(template_obj["Names"]))
f.write("\n")
#if os.path.getsize(opts.target_output) == 0:
f.write("Templates,")
f.write(",".join(template_obj["Names"]))
f.write("\n")
f.write(targetFileName)
for t, ms in enumerate(targetMatchScore):
slope, intercept = equations[t]
Expand Down

0 comments on commit f98d240

Please sign in to comment.