-
Notifications
You must be signed in to change notification settings - Fork 269
removes dependence on cogent's DNA, LoadSeqs, Alignment, DenseAlignment #1497
Changes from all commits
cdde0ec
92da297
d11c1b6
f0281bb
5a6575d
0cdbc7d
0ebee85
fbec872
918d769
1f8c280
a1414dc
869ae26
7e2881f
0d76a72
13667fb
e4464f7
bc395e2
6abd439
cbbd6ef
0e005cd
09ebf5d
38cf234
e0b0ef2
2a74967
0cffae5
453ac49
a8d03ba
503e56f
7be747d
c822a7d
1e6fd16
c8c3ca6
ab5c144
df5801a
e86f5ab
bc2f968
f0b1161
5101383
fbbbe3e
1d4ca74
456aecd
370f893
c61a4a2
8b2df4e
c5ca2bc
9428451
31dd4ce
3b4cbe9
00d1093
8ed1e81
123d41e
3d4bd51
5860eda
3561d9b
75837fb
b2049ff
941ad00
7d18735
6805809
47af8e3
2643343
b76fd0c
b4c1015
303d70f
5044514
f60452e
4c24f84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,24 +25,24 @@ | |
from os import remove | ||
from numpy import median | ||
|
||
from cogent import LoadSeqs, DNA | ||
from cogent.core.alignment import DenseAlignment, SequenceCollection, Alignment | ||
from cogent.core.sequence import DnaSequence as Dna | ||
from cogent.parse.rfam import MinimalRfamParser, ChangedSequence | ||
|
||
import brokit | ||
from brokit.infernal import cmalign_from_alignment | ||
import brokit.clustalw | ||
import brokit.muscle_v38 | ||
import brokit.mafft | ||
|
||
from cogent import DNA as DNA_cogent | ||
from cogent.parse.rfam import MinimalRfamParser, ChangedSequence | ||
from skbio.app.util import ApplicationNotFoundError | ||
from skbio.core.exception import RecordError | ||
from skbio.parse.sequences import parse_fasta | ||
|
||
from qiime.util import (FunctionWithParams, | ||
get_qiime_temp_dir) | ||
|
||
from skbio.core.alignment import SequenceCollection, Alignment | ||
from skbio.core.sequence import DNASequence | ||
from skbio.parse.sequences import parse_fasta | ||
|
||
# Load PyNAST if it's available. If it's not, skip it if not but set up | ||
# to raise errors if the user tries to use it. | ||
|
@@ -115,7 +115,7 @@ def getResult(self, seq_path): | |
seqs = self.getData(seq_path) | ||
params = dict( | ||
[(k, v) for (k, v) in self.Params.items() if k.startswith('-')]) | ||
result = module.align_unaligned_seqs(seqs, moltype=DNA, params=params) | ||
result = module.align_unaligned_seqs(seqs, moltype=DNA_cogent, params=params) | ||
return result | ||
|
||
def __call__(self, result_path=None, log_path=None, *args, **kwargs): | ||
|
@@ -131,7 +131,7 @@ def __init__(self, params): | |
"""Return new InfernalAligner object with specified params. | ||
""" | ||
_params = { | ||
'moltype': DNA, | ||
'moltype': DNA_cogent, | ||
'Application': 'Infernal', | ||
} | ||
_params.update(params) | ||
|
@@ -156,9 +156,10 @@ def __call__(self, seq_path, result_path=None, log_path=None, | |
moltype = self.Params['moltype'] | ||
|
||
# Need to make separate mapping for unaligned sequences | ||
unaligned = SequenceCollection(candidate_sequences, MolType=moltype) | ||
int_map, int_keys = unaligned.getIntMap(prefix='unaligned_') | ||
int_map = SequenceCollection(int_map, MolType=moltype) | ||
unaligned = SequenceCollection.from_fasta_records( | ||
candidate_sequences.iteritems(), DNASequence) | ||
mapped_seqs, new_to_old_ids = unaligned.int_map(prefix='unaligned_') | ||
mapped_seq_tuples = [(k, str(v)) for k,v in mapped_seqs.iteritems()] | ||
|
||
# Turn on --gapthresh option in cmbuild to force alignment to full | ||
# model | ||
|
@@ -174,7 +175,6 @@ def __call__(self, seq_path, result_path=None, log_path=None, | |
# are fragments. | ||
# Also turn on --gapthresh to use same gapthresh as was used to build | ||
# model | ||
|
||
if cmalign_params is None: | ||
cmalign_params = {} | ||
cmalign_params.update({'--sub': True, '--gapthresh': 1.0}) | ||
|
@@ -186,20 +186,23 @@ def __call__(self, seq_path, result_path=None, log_path=None, | |
# Align sequences to alignment including alignment gaps. | ||
aligned, struct_string = cmalign_from_alignment(aln=template_alignment, | ||
structure_string=struct, | ||
seqs=int_map, | ||
seqs=mapped_seq_tuples, | ||
moltype=moltype, | ||
include_aln=True, | ||
params=cmalign_params, | ||
cmbuild_params=cmbuild_params) | ||
|
||
# Pull out original sequences from full alignment. | ||
infernal_aligned = {} | ||
infernal_aligned = [] | ||
# Get a dict of the identifiers to sequences (note that this is a | ||
# cogent alignment object, hence the call to NamedSeqs) | ||
aligned_dict = aligned.NamedSeqs | ||
for key in int_map.Names: | ||
infernal_aligned[int_keys.get(key, key)] = aligned_dict[key] | ||
for n, o in new_to_old_ids.iteritems(): | ||
aligned_seq = aligned_dict[n] | ||
infernal_aligned.append((o, aligned_seq)) | ||
|
||
# Create an Alignment object from alignment dict | ||
infernal_aligned = Alignment(infernal_aligned, MolType=moltype) | ||
infernal_aligned = Alignment.from_fasta_records(infernal_aligned, DNASequence) | ||
|
||
if log_path is not None: | ||
log_file = open(log_path, 'w') | ||
|
@@ -208,7 +211,7 @@ def __call__(self, seq_path, result_path=None, log_path=None, | |
|
||
if result_path is not None: | ||
result_file = open(result_path, 'w') | ||
result_file.write(infernal_aligned.toFasta()) | ||
result_file.write(infernal_aligned.to_fasta()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may be outside the scope of this pull request: Could There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that's a good suggestion, but outside the scope of this PR. Could you add to skbio's #194? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
result_file.close() | ||
return None | ||
else: | ||
|
@@ -248,12 +251,8 @@ def __call__(self, seq_path, result_path=None, log_path=None, | |
for seq_id, seq in parse_fasta(open(template_alignment_fp)): | ||
# replace '.' characters with '-' characters | ||
template_alignment.append((seq_id, seq.replace('.', '-').upper())) | ||
try: | ||
template_alignment = LoadSeqs(data=template_alignment, moltype=DNA, | ||
aligned=DenseAlignment) | ||
except KeyError as e: | ||
raise KeyError('Only ACGT-. characters can be contained in template alignments.' + | ||
' The offending character was: %s' % e) | ||
template_alignment = Alignment.from_fasta_records( | ||
template_alignment, DNASequence, validate=True) | ||
|
||
# initialize_logger | ||
logger = NastLogger(log_path) | ||
|
@@ -273,25 +272,28 @@ def __call__(self, seq_path, result_path=None, log_path=None, | |
|
||
logger.record(str(self)) | ||
|
||
for i, seq in enumerate(pynast_failed): | ||
skb_seq = DNASequence(str(seq), identifier=seq.Name) | ||
pynast_failed[i] = skb_seq | ||
pynast_failed = SequenceCollection(pynast_failed) | ||
|
||
for i, seq in enumerate(pynast_aligned): | ||
skb_seq = DNASequence(str(seq), identifier=seq.Name) | ||
pynast_aligned[i] = skb_seq | ||
pynast_aligned = Alignment(pynast_aligned) | ||
|
||
if failure_path is not None: | ||
fail_file = open(failure_path, 'w') | ||
for seq in pynast_failed: | ||
fail_file.write(seq.toFasta()) | ||
fail_file.write('\n') | ||
fail_file.write(pynast_failed.to_fasta()) | ||
fail_file.close() | ||
|
||
if result_path is not None: | ||
result_file = open(result_path, 'w') | ||
for seq in pynast_aligned: | ||
result_file.write(seq.toFasta()) | ||
result_file.write('\n') | ||
result_file.write(pynast_aligned.to_fasta()) | ||
result_file.close() | ||
return None | ||
else: | ||
try: | ||
return LoadSeqs(data=pynast_aligned, aligned=DenseAlignment) | ||
except ValueError: | ||
return {} | ||
return pynast_aligned | ||
|
||
|
||
def compute_min_alignment_length(seqs_f, fraction=0.75): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should these dependencies also be removed from qiime-deploy? If so, can you please create issues on the qiime-deploy-conf tracker?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done: #109