Skip to content

Commit

Permalink
Merge pull request #440 from GavinHuttley/master
Browse files Browse the repository at this point in the history
Updates to docs
  • Loading branch information
GavinHuttley committed Dec 6, 2019
2 parents 5d87551 + 8834c36 commit afb9b7f
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 102 deletions.
208 changes: 125 additions & 83 deletions src/cogent3/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,22 +1136,22 @@ def __add__(self, other):
def add_seqs(self, other, before_name=None, after_name=None):
"""Returns new object of class self with sequences from other added.
By default the sequence is appended to the end of the alignment,
this can be changed by using either before_name or after_name arguments.
Parameters
----------
other
same class as self or coerceable to that class
before_name
str
before_name : str
which sequence is added
after_name
str
after_name : str
which sequence is added
Notes
-----
If both before_name and after_name are specified, the seqs will be
inserted using before_name.
By default the sequence is appended to the end of the alignment,
this can be changed by using either before_name or after_name arguments.
"""
assert not isinstance(other, str), (
"Must provide a series of seqs " + "or an alignment"
Expand Down Expand Up @@ -1214,6 +1214,9 @@ def write(self, filename=None, format=None, **kwargs):
format
format of the sequence file
Notes
-----
If format is None, will attempt to infer format from the filename
suffix.
"""
Expand Down Expand Up @@ -1243,7 +1246,8 @@ def __len__(self):
return self.seq_len

def get_translation(self, gc=None, incomplete_ok=False, **kwargs):
"""
"""translate from nucleic acid to protein
Parameters
----------
gc
Expand Down Expand Up @@ -1405,19 +1409,21 @@ def counts_per_seq(
):
"""returns dict of counts of motifs per sequence
only non-overlapping motifs are counted.
Parameters
----------
motif_length
number of characters per tuple.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gaps
if True, motifs containing a gap character are included.
Parameters
----------
motif_length
number of characters per tuple.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gaps
if True, motifs containing a gap character are included.
Notes
-----
"""
only non-overlapping motifs are counted
"""
# this is overridden for Alignments, so just rely on the sequence counts
# method
counts = []
Expand Down Expand Up @@ -1446,21 +1452,23 @@ def counts(
):
"""returns dict of counts of motifs
only non-overlapping motifs are counted.
Parameters
----------
motif_length
number of elements per character.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gaps
if True, motifs containing a gap character are included.
exclude_unobserved
if True, unobserved motif combinations are excluded.
Parameters
----------
motif_length
number of elements per character.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gaps
if True, motifs containing a gap character are included.
exclude_unobserved
if True, unobserved motif combinations are excluded.
Notes
-----
"""
only non-overlapping motifs are counted
"""
per_seq = self.counts_per_seq(
motif_length=motif_length,
include_ambiguity=include_ambiguity,
Expand Down Expand Up @@ -1492,6 +1500,10 @@ def get_motif_probs(
allow_gap
allow gap motif
Notes
-----
only non-overlapping motifs are counted
"""
if alphabet is None:
alphabet = self.moltype.alphabet
Expand Down Expand Up @@ -1763,6 +1775,7 @@ def dotplot(

def rename_seqs(self, renamer):
"""returns new instance with sequences renamed
Parameters
----------
renamer : callable
Expand Down Expand Up @@ -1793,8 +1806,8 @@ def rename_seqs(self, renamer):
def apply_pssm(
self, pssm=None, path=None, background=None, pseudocount=0, names=None, ui=None
):
"""
scores sequences using the specified pssm
"""scores sequences using the specified pssm
Parameters
----------
pssm : profile.PSSM
Expand All @@ -1806,6 +1819,7 @@ def apply_pssm(
adjustment for zero in matrix
names
returns only scores for these sequences and in the name order
Returns
-------
numpy array of log2 based scores at every position
Expand Down Expand Up @@ -1871,30 +1885,35 @@ def set_repr_policy(self, num_seqs=None, num_pos=None):
assert isinstance(num_pos, int), "num_pos is not an integer"
self._repr_policy["num_pos"] = num_pos

def probs_per_seq(self, motif_length=1,
include_ambiguity=False,
allow_gap=False,
exclude_unobserved=False,
alert=False):
def probs_per_seq(
self,
motif_length=1,
include_ambiguity=False,
allow_gap=False,
exclude_unobserved=False,
alert=False,
):
"""return MotifFreqsArray per sequence"""

counts = self.counts_per_seq(
motif_length=motif_length,
include_ambiguity=include_ambiguity,
allow_gap=allow_gap,
exclude_unobserved=exclude_unobserved
exclude_unobserved=exclude_unobserved,
)
if counts is None:
return None

return counts.to_freq_array()

def entropy_per_seq(self,
motif_length=1,
include_ambiguity=False,
allow_gap=False,
exclude_unobserved=True,
alert=False):
def entropy_per_seq(
self,
motif_length=1,
include_ambiguity=False,
allow_gap=False,
exclude_unobserved=True,
alert=False,
):
"""returns the Shannon entropy per sequence
Parameters
Expand All @@ -1914,9 +1933,13 @@ def entropy_per_seq(self,
For motif_length > 1, it's advisable to specify exclude_unobserved=True,
this avoids unnecessary calculations.
"""
probs = self.probs_per_seq(motif_length=motif_length, include_ambiguity=include_ambiguity,
allow_gap=allow_gap,
exclude_unobserved=exclude_unobserved, alert=alert)
probs = self.probs_per_seq(
motif_length=motif_length,
include_ambiguity=include_ambiguity,
allow_gap=allow_gap,
exclude_unobserved=exclude_unobserved,
alert=alert,
)
if probs is None:
return None

Expand Down Expand Up @@ -2307,7 +2330,20 @@ def probs_per_seq(
exclude_unobserved=False,
alert=False,
):
"""return MotifFreqsArray per sequence"""
"""return MotifFreqsArray per sequence
Parameters
----------
motif_length
number of characters per tuple.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gap
if True, motifs containing a gap character are included.
exclude_unobserved
if True, unobserved motif combinations are excluded.
"""
counts = self.counts_per_seq(
motif_length=motif_length,
include_ambiguity=include_ambiguity,
Expand All @@ -2329,23 +2365,23 @@ def entropy_per_seq(
):
"""returns the Shannon entropy per sequence
Parameters
----------
motif_length
number of characters per tuple.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gap
if True, motifs containing a gap character are included.
exclude_unobserved
if True, unobserved motif combinations are excluded.
Parameters
----------
motif_length
number of characters per tuple.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gap
if True, motifs containing a gap character are included.
exclude_unobserved
if True, unobserved motif combinations are excluded.
Notes
-----
For motif_length > 1, it's advisable to specify exclude_unobserved=True,
this avoids unnecessary calculations.
"""
Notes
-----
For motif_length > 1, it's advisable to specify exclude_unobserved=True,
this avoids unnecessary calculations.
"""
probs = self.probs_per_seq(
motif_length=motif_length,
include_ambiguity=include_ambiguity,
Expand Down Expand Up @@ -2860,6 +2896,9 @@ def counts_per_pos(
):
"""return DictArray of counts per position
Parameters
----------
alert
warns if motif_length > 1 and alignment trimmed to produce
motif columns
Expand Down Expand Up @@ -2897,22 +2936,21 @@ def counts_per_seq(
):
"""returns dict of counts of non-overlapping motifs per sequence
Parameters
----------
motif_length
number of elements per character.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gaps
if True, motifs containing a gap character are included.
exclude_unobserved
if False, all canonical states included
alert
warns if motif_length > 1 and alignment trimmed to produce
motif columns
"""
Parameters
----------
motif_length
number of elements per character.
include_ambiguity
if True, motifs containing ambiguous characters
from the seq moltype are included. No expansion of those is attempted.
allow_gaps
if True, motifs containing a gap character are included.
exclude_unobserved
if False, all canonical states included
alert
warns if motif_length > 1 and alignment trimmed to produce
motif columns
"""
length = (len(self) // motif_length) * motif_length
if alert and len(self) != length:
warnings.warn(f"trimmed {len(self) - length}", UserWarning)
Expand Down Expand Up @@ -3006,6 +3044,7 @@ def to_type(self, array_align=False, moltype=None, alphabet=None):

def distance_matrix(self, calc="percent", show_progress=False, drop_invalid=False):
"""Returns pairwise distances between sequences.
Parameters
----------
calc : str
Expand Down Expand Up @@ -3790,6 +3829,7 @@ def sample(

def filtered(self, predicate, motif_length=1, drop_remainder=True, **kwargs):
"""The alignment positions where predicate(column) is true.
Parameters
----------
predicate : callable
Expand Down Expand Up @@ -3969,7 +4009,9 @@ def add_from_ref_aln(self, ref_aln, before_name=None, after_name=None):
If both before_name and after_name are specified seqs will be
inserted using before_name.
Example:
Examples
--------
Aln1:
-AC-DEFGHI (name: seq1)
XXXXXX--XX (name: seq2)
Expand Down
5 changes: 3 additions & 2 deletions src/cogent3/core/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,15 +1138,16 @@ def trim_stop_codon(self, gc=None, allow_partial=False):
return self.__class__(codons, name=self.name, info=self.info)

def get_translation(self, gc=None, incomplete_ok=False):
"""
translation to amino acid sequence
"""translate to amino acid sequence
Parameters
----------
gc
name or ID of genetic code
incomplete_ok : bool
codons that are mixes of nucleotide and gaps converted to '?'.
raises a ValueError if False
Returns
-------
sequence of PROTEIN moltype
Expand Down

0 comments on commit afb9b7f

Please sign in to comment.