-
Notifications
You must be signed in to change notification settings - Fork 90
/
color_schemes_protein.py
103 lines (92 loc) · 3.95 KB
/
color_schemes_protein.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
Biotite color schemes for protein sequences
===========================================
This script shows the same multiple protein sequence alignment
in the different color schemes available in *Biotite*.
- **rainbow** - Default color scheme in *Biotite*
- **clustalx** - Default color scheme of the *ClustalX* software
- Color schemes generated with the software *Gecos* [1]_:
- **flower** - Light color scheme, based on *BLOSUM62*
- **blossom** - Light color scheme with high contrast, based on
*BLOSUM62*, depicts symbol similarity worse than *flower*
- **spring** - Light color scheme, based on *BLOSUM62*,
with alanine fixed to gray
- **wither** - Dark color scheme, analogous to *blossom*
- **autumn** - Dark color scheme, analogous to *spring*
- **sunset** - Red-green color vision deficiency adapated color
scheme, based on *BLOSUM62*
- **ocean** - Blue shifted, light color scheme,
based on *BLOSUM62*
- Color schemes adapted from *JalView* [2]_:
- **zappo** - Color scheme that depicts physicochemical properties
- **taylor** - Color scheme invented by Willie Taylor
- **buried** - Color scheme depicting the *buried index*
- **hydrophobicity** - Color scheme depicting hydrophobicity
- **prophelix** - Color scheme depicting secondary structure
propensities
- **propstrand** - Color scheme depicting secondary structure
propensities
- **propturn** - Color scheme depicting secondary structure
propensities
.. [1] P Kunzmann, B Mayer,
"Substitution matrix based color schemes for sequence alignment
visualizations."
Unpublished
.. [2] M Clamp, J Cuff, SM Searle, GJ Barton,
"The Jalview Java alignment editor."
Bioinformatics, 20, 426-427 (2004).
"""
# Code source: Patrick Kunzmann
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.database.entrez as entrez
# Generate example alignment
# (the same as in the bacterial luciferase example)
query = entrez.SimpleQuery("luxA", "Gene Name") \
& entrez.SimpleQuery("srcdb_swiss-prot", "Properties")
uids = entrez.search(query, db_name="protein")
fasta_file = fasta.FastaFile.read(entrez.fetch_single_file(
uids, None, db_name="protein", ret_type="fasta"
))
sequences = [seq.ProteinSequence(seq_str) for seq_str in fasta_file.values()]
matrix = align.SubstitutionMatrix.std_protein_matrix()
alignment, order, _, _ = align.align_multiple(sequences, matrix)
# Order alignment according to the guide tree
alignment = alignment[:, order]
alignment = alignment[220:300]
# Get color scheme names
alphabet = seq.ProteinSequence.alphabet
schemes = [
"rainbow", "clustalx",
"flower", "blossom", "spring", "wither", "autumn", "sunset", "ocean",
"zappo", "taylor", "buried", "hydrophobicity",
"prophelix", "propstrand", "propturn"
]
count = len(schemes)
# Assert that this example displays all available amino acid color schemes
all_schemes = graphics.list_color_scheme_names(alphabet)
assert set(schemes) == set(all_schemes)
# Visualize each scheme using the example alignment
fig = plt.figure(figsize=(8.0, count*2.0))
gridspec = GridSpec(2, count)
for i, name in enumerate(schemes):
for j, color_symbols in enumerate([False, True]):
ax = fig.add_subplot(count, 2, 2*i + j + 1)
if j == 0:
ax.set_ylabel(name)
alignment_part = alignment[:40]
else:
alignment_part = alignment[40:]
graphics.plot_alignment_type_based(
ax, alignment_part, symbols_per_line=len(alignment_part),
color_scheme=name, color_symbols=color_symbols, symbol_size=8
)
fig.tight_layout()
fig.subplots_adjust(wspace=0)
plt.show()