This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
/
plot_rank_abundance_graph.py
137 lines (113 loc) · 4.49 KB
/
plot_rank_abundance_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
# File created on 17 Aug 2010
from __future__ import division
__author__ = "Jens Reeder"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jens Reeder"]
__license__ = "GPL"
__version__ = "1.5.0"
__maintainer__ = "Justin Kuczynski"
__email__ = "justinak@gmail.com"
__status__ = "Release"
from matplotlib import use
use('Agg',warn=False)
from numpy import arange, array
from itertools import cycle
from matplotlib.pyplot import plot, gca, ylim, xlim, show, legend, \
savefig
from os.path import join
from qiime.colors import data_color_order,data_colors
from biom.table import UnknownID
def make_sorted_frequencies(counts, absolute=False):
"""transform and sort a vector of count.
counts: a column of an OTU table
absolute: if True return absolute values instead of frequencies.
"""
c = counts
c.sort()
c = c[c.nonzero()]
c = c[::-1]
if absolute:
return c
else:
f = c/float(c.sum())
return f
def plot_rank_abundance_graph(otu_count_vector, color='red', absolute=False, label=None):
"""Plots rank-abundance curve.
otu_count_vector: a vector of otu counts for a single sample
color: color of the series to plot
absolute: if True plot absolute counts instead of freqs
label: text for the legend of this series
"""
f= make_sorted_frequencies(otu_count_vector, absolute)
x = arange(1,len(f)+1)
plot(x, f, color=color, alpha= 0.8, label=label)
ax = gca()
return ax
def plot_rank_abundance_graphs(sample_names, otu_table,
output_dir, file_type='pdf',
absolute_counts=False,
x_linear_scale=False,
y_linear_scale=False,
no_legend=False,
log_fh=None):
"""plot rank-abundance curves for sample specified in sample_name.
sample_names: comma separated string of sample names
otu_table_fh: open file handle to otu table
output_dir: existing directory to which files are written
file_type: valid matplotlib file type
x_linear_scale: if True draw x axis in linear scale, otherwise use log
y_linear_scale: if True draw y axis in linear scale, otherwise use log
no_legend: if True don't draw legend
log_fh: open file handle to log file, if not None used to log
"""
#figure out which samples to draw
if sample_names=='*':
user_sample_names = otu_table.SampleIds
else:
user_sample_names = sample_names.split(',')
if len(user_sample_names)<1:
raise ValueError, "sample IDs must be comma separated list of "\
+"sample names - found %s" % sample_names
# do the actual drawing
ax=None
for sample_name,color in zip(user_sample_names, cycle(data_color_order)):
color=data_colors[color].toHex()
try:
otu_count_vector = otu_table.sampleData(sample_name)
except UnknownID:
if log_fh:
log_fh.write("UnknownID: Sample name %s not in OTU table - skipping." % sample_name)
continue
ax = plot_rank_abundance_graph(otu_count_vector,
color=color,
absolute=absolute_counts,
label=sample_name)
ax.set_label(sample_name)
if ax==None:
#ax should be defined if at least one series has been drawn
raise ValueError("No data series drawn. Check your OTU table and sample names")
#settings for all series
ax.grid()
ax.set_xlabel('Species rank')
ax.set_ylabel('Relative abundance')
if not x_linear_scale:
ax.set_xscale('log')
if not y_linear_scale:
ax.set_yscale('log')
if not no_legend:
legend()
#build output fp, if less than MAX_SAMPLES_... append the samples names
output_fp = output_dir
MAX_SAMPLES_TO_SHOW_IN_FILENAME = 6
rows_for_fname=[]
for i,nam in enumerate(otu_table.SampleIds):
for j,sel_name in enumerate(user_sample_names):
if sel_name==nam:
rows_for_fname.append(str(i))
if len(rows_for_fname) < MAX_SAMPLES_TO_SHOW_IN_FILENAME:
output_fp=join(output_fp,"rank_abundance_cols_"+'_'.join(rows_for_fname))
else:
output_fp=join(output_fp,"rank_abundance_cols_"+'_'.join(rows_for_fname))
output_fp += ".%s" % file_type
savefig(output_fp)