Skip to content

Commit

Permalink
Merge pull request #517 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
ENH: major new drawing feature, support for creating sequence logos
  • Loading branch information
GavinHuttley committed Feb 1, 2020
2 parents 5a09fd7 + 8ceccd0 commit 4521109
Show file tree
Hide file tree
Showing 16 changed files with 11,916 additions and 5 deletions.
5 changes: 5 additions & 0 deletions doc/data/tbp.jaspar
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
>MA0108.2 TBP
A [ 61 16 352 3 354 268 360 222 155 56 83 82 82 68 77 ]
C [145 46 0 10 0 0 3 2 44 135 147 127 118 107 101 ]
G [152 18 2 2 5 0 20 44 157 150 128 128 128 139 140 ]
T [ 31 309 35 374 30 121 6 121 33 48 31 52 61 75 71 ]
2 changes: 2 additions & 0 deletions doc/data_file_links.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ All data files referred to in the documentation can be download by clicking on t

:download:`BRCA1-demo.fasta <data/BRCA1-demo.fasta>`

:download:`tbp.jaspar <data/tbp.jaspar>`

9,607 changes: 9,607 additions & 0 deletions doc/draw/draw-seqlogo.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions doc/draw/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Alignments
display-gaps-per-seq
draw-alignment-coevolution
draw-alignment-info-plot
draw-seqlogo


*****
Expand Down
30 changes: 30 additions & 0 deletions src/cogent3/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3459,6 +3459,36 @@ def coevolution(

return result

def seqlogo(self, width=700, height=100, wrap=None, vspace=0.005, colours=None):
"""returns Drawable sequence logo using mutual information
Parameters
----------
width, height : float
plot dimensions in pixels
wrap : int
number of alignment columns per row
vspace : float
vertical separation between rows, as a proportion of total plot
colours : dict
mapping of characters to colours. If note provided, defaults to
custom for everything ecept protein, which uses protein moltype
colours.
Notes
-----
Computes MI based on log2 and includes the gap state, so the maximum
possible value is -log2(1/num_states)
"""
assert 0 <= vspace <= 1, "vertical space must be in range 0, 1"
freqs = self.counts_per_pos(allow_gap=True).to_freq_array()
if colours is None and "protein" in self.moltype.label:
colours = self.moltype._colors

return freqs.logo(
width=width, height=height, wrap=wrap, vspace=vspace, colours=colours
)


def _one_length(seqs):
"""raises ValueError if seqs not all same length"""
Expand Down
75 changes: 74 additions & 1 deletion src/cogent3/core/profile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy

from numpy import digitize, array
from numpy import array, digitize
from numpy.random import random
from numpy.testing import assert_allclose

Expand Down Expand Up @@ -371,6 +371,79 @@ def to_pssm(self, background=None):
background=background,
)

def logo(
self, height=400, width=800, wrap=None, ylim=None, vspace=0.05, colours=None
):
"""returns a sequence logo Drawable"""
from cogent3.draw.logo import get_mi_char_heights, get_logo
from cogent3.draw.drawable import get_domain

assert 0 <= vspace <= 1, f"{vspace} not in range 0-1"
if ylim is None:
ylim = -numpy.log2(1 / self.shape[1]) * 1.1

if wrap is None:
mit = get_mi_char_heights(self)
logo = get_logo(mit, height=height, width=width, ylim=ylim, colours=colours)
return logo

wrap = min(wrap, self.shape[0])
rows, remainder = divmod(self.shape[0], wrap)
num_rows = rows + 1 if remainder else rows

axnum = 1
logo = None
xlim_text = {
"showarrow": False,
"text": "Position",
"x": None,
"xanchor": "center",
"xref": None,
"y": 0,
"yshift": 2,
"yanchor": "bottom",
"yref": None,
}
for i in range(0, self.shape[0], wrap):
axis = "axis" if axnum == 1 else f"axis{axnum}"
ydomain = get_domain(num_rows, axnum - 1, is_y=True, space=vspace)
segment = self[i : i + wrap, :]
mit = get_mi_char_heights(segment)
sublogo = get_logo(
mit,
height=height,
width=width,
axnum=axnum,
ydomain=ydomain,
ylim=ylim,
colours=colours,
)
xtick_vals = [j for j in range(0, segment.shape[0], 20)]
xtick_text = [f"{i + j}" for j in range(0, segment.shape[0], 20)]
sublogo.layout[f"x{axis}"].showticklabels = False
sublogo.layout[f"x{axis}"].domain = [0, segment.shape[0] / wrap]

# place the row limit x-coord
xtext = xlim_text.copy()
xtext["text"] = f"{i + segment.shape[0]}"
xtext["x"] = segment.shape[0] + 1
xtext["xref"] = f"x{'' if axnum == 1 else axnum}"
xtext["yref"] = f"y{'' if axnum == 1 else axnum}"
sublogo.layout.annotations = [xtext]

if logo is None:
logo = sublogo
else:
logo.layout.shapes.extend(sublogo.layout.shapes)
logo.layout.annotations.extend(sublogo.layout.annotations)

logo.layout[f"x{axis}"] = sublogo.layout[f"x{axis}"]
logo.layout[f"y{axis}"] = sublogo.layout[f"y{axis}"]

axnum += 1

return logo


class PSSM(_MotifNumberArray):
"""position specific scoring matrix
Expand Down
3 changes: 2 additions & 1 deletion src/cogent3/draw/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__all__ = ["dendrogram", "dotplot", "drawable"]
__all__ = ["dendrogram", "dotplot", "drawable", "letter", "logo"]

__copyright__ = "Copyright 2007-2019, The Cogent Project"
__contributors__ = [
Expand All @@ -11,6 +11,7 @@
"Matthew Wakefield",
"Stephanie Wilson",
"Rahul Ghangas",
"Sheng Han Moses Koh",
]
__license__ = "BSD-3"
__version__ = "2019.12.6a"
Expand Down
2 changes: 1 addition & 1 deletion src/cogent3/draw/dendrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def support_text_coord(self, xshift, yshift, threshold=1, max_attr_length=4):

c = np.cos(textangle * (np.pi / 180))
s = np.sin(textangle * (np.pi / 180))
m = np.matrix([[c, s], [-s, c]])
m = np.array([[c, s], [-s, c]])
d = np.dot(m, [xshift, yshift])

new_xshift = float(d.T[0])
Expand Down
33 changes: 33 additions & 0 deletions src/cogent3/draw/drawable.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,39 @@
PLOTLY_RENDERER = os.environ.get("PLOTLY_RENDERER", None)


def get_domain(total, element, is_y, space=0.01):
"""returns evenly spaced domain for an element in a grid plot
Parameters
----------
total : int
the total number of elements on the axis
element : int
the element number to compute the domain for
is_y : bool
if True, this is for a y-coordinate domain. This is reversed
so the result is in cartesian, not array, coordinates
space : float
the separation between elements
"""
if total == 1:
return [0, 1]

if element > total - 1:
raise ValueError(f"{element} index too big for {total}")

per_element = 1 / total
space = min(space / 2, per_element / 10)
bounds = [per_element * i for i in range(total + 1)]
domains = [
(bounds[k] + space, bounds[k + 1] - space) for k in range(len(bounds) - 1)
]
if is_y:
element = total - element - 1

return domains[element]


def _show_(cls, renderer=None, **kwargs):
"""display figure
Expand Down

0 comments on commit 4521109

Please sign in to comment.