forked from dmgroppe/Mass_Univariate_ERP_Toolbox
/
fdr_bky.m
executable file
·237 lines (218 loc) · 8.55 KB
/
fdr_bky.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
% fdr_bky() - Executes the "two-stage" Benjamini, Krieger, & Yekutieli (2006)
% procedure for controlling the false discovery rate (FDR) of a
% family of hypothesis tests. FDR is the expected proportion of
% rejected hypotheses that are mistakenly rejected (i.e., the null
% hypothesis is actually true for those tests). FDR is a
% somewhat less conservative/more powerful method for correcting
% for multiple comparisons than procedures like Bonferroni
% correction that provide strong control of the family-wise
% error rate (i.e., the probability that one or more null
% hypotheses are mistakenly rejected).
% The procedure implemented by this function is more powerful
% than the original Benjamini & Hochberg (1995) procedure when
% a considerable percentage of the hypotheses in the family are
% false. To the best of my knowledge, this procedure is only
% guaranteed to control FDR if the tests are independent.
% However, simulations suggest that it can control FDR even
% when the tests are positively correlated (Benjamini et al.,
% 2006).
%
% Usage:
% >> [h, crit_p]=fdr_bky(pvals,q,report);
%
% Required Input:
% pvals - A vector or matrix (two dimensions or more) containing the
% p-value of each individual test in a family of tests.
%
% Optional Inputs:
% q - The desired false discovery rate. {default: 0.05}
% report - ['yes' or 'no'] If 'yes', a brief summary of FDR results are
% output to the MATLAB command line {default: 'no'}
%
%
% Outputs:
% h - A binary vector or matrix of the same size as the input "pvals."
% If the ith element of h is 1, then the test that produced the
% ith p-value in pvals is significant (i.e., the null hypothesis
% of the test is rejected).
% crit_p - All p-values less than or equal to crit_p are significant
% (i.e., their null hypotheses are rejected). If no p-values are
% significant, crit_p=0.
%
%
% References:
% Benjamini, Y., Krieger, A.M., & Yekutieli, D. (2006) Adaptive linear
% step-up procedures that control the false discovery rate. Biometrika.
% 93(3), 491-507.
%
% Benjamini, Y. & Hochberg, Y. (1995) Controlling the false discovery
% rate: A practical and powerful approach to multiple testing. Journal
% of the Royal Statistical Society, Series B (Methodological). 57(1),
% 289-300.
%
% Example:
% [dummy p_null]=ttest(randn(12,15)); %15 tests where the null hypothesis
% %is true
% [dummy p_effect]=ttest(randn(12,5)+1); %5 tests where the null
% %hypothesis is false
% [h crit_p]=fdr_bky([p_null p_effect],.05,'yes');
%
%
% For a review on false discovery rate control and other contemporary
% techniques for correcting for multiple comparisons see:
%
% Groppe, D.M., Urbach, T.P., & Kutas, M. (2011) Mass univariate analysis
% of event-related brain potentials/fields I: A critical tutorial review.
% Psychophysiology, 48(12) pp. 1711-1725, DOI: 10.1111/j.1469-8986.2011.01273.x
% http://www.cogsci.ucsd.edu/~dgroppe/PUBLICATIONS/mass_uni_preprint1.pdf
%
% Author:
% David M. Groppe
% Kutaslab
% Dept. of Cognitive Science
% University of California, San Diego
% March 25, 2010
function [h crit_p]=fdr_bky(p_values,q,report)
if nargin<1,
error('You need to provide a vector or matrix of p-values.');
else
if ~isempty(find(p_values<0,1)),
error('Some p-values are less than 0.');
elseif ~isempty(find(p_values>1,1)),
error('Some p-values are greater than 1.');
end
end
if nargin<2,
q=.05;
end
if nargin<3,
report='no';
end
s=size(p_values);
if (length(s)>2) || s(1)>1,
p_sorted=sort(reshape(p_values,1,prod(s)));
else
%p-values are already a row vector
p_sorted=sort(p_values);
end
m=length(p_sorted); %number of tests
%STEP 1: Run classic Benjamini-Hochberg linear step up FDR procedure (BH) with
%slightly more conservative q value
q_prime=q/(1+q);
[hh crit_p]=fdr_bh(p_sorted,q_prime,'pdep');
r1=sum(hh);
if r1==0,
%NO hypotheses rejected, stop here
crit_p=0;
h=p_values*0;
elseif r1==m,
%ALL hypotheses rejected, stop here
crit_p=p_sorted(end); %critical p-value is biggest p-value
h=p_values<=crit_p;
else
%Continue on.....
%STEP 2: r1=Estimated # of false hypotheses
m0hat=m-r1; % m0hat=estimated # of true hypotheses
%repeat BH with new q
q_star=q_prime*m/m0hat;
[hh crit_p]=fdr_bh(p_sorted,q_star,'pdep');
h=p_values<=crit_p;
end
if strcmpi(report,'yes'),
n_sig=sum(hh);
if n_sig==1,
fprintf('Out of %d tests, %d is significant using a false discovery rate of %f.\n',m,n_sig,q);
else
fprintf('Out of %d tests, %d are significant using a false discovery rate of %f.\n',m,n_sig,q);
end
fprintf('FDR two-stage procedure used is guaranteed valid for independent tests.\n');
end
function [h crit_p]=fdr_bh(pvals,q,method)
% fdr_bh() - Executes the Benjamini & Hochberg (1995) procedure for
% controlling the false discovery rate (FDR) of a family of
% hypothesis tests. FDR is the expected proportion of rejected
% hypotheses that are mistakenly rejected (i.e., the null
% hypothesis is actually true for those tests). FDR is a
% somewhat less conservative/more powerful method for correcting
% for multiple comparisons than procedures like Bonferroni
% correction that provide strong control of the family-wise
% error rate (i.e., the probability that one or more null
% hypotheses are mistakenly rejected).
%
% Usage:
% >> [h, crit_p]=fdr_bh(pvals,q,method);
%
% Required Input:
% pvals - A vector or matrix (two dimensions or more) containing the
% p-value of each individual test in a family of tests.
% q - The desired false discovery rate.
%
% Optional Inputs:
% method - ['pdep' or 'dep'] If 'pdep,' the original Bejnamini & Hochberg
% FDR procedure is used, which is guaranteed to be accurate if
% the individual tests are independent or positively dependent
% (e.g., positively correlated). If 'dep,' the FDR procedure
% described in Benjamini & Yekutieli (2001) that is guaranteed
% to be accurate for any test dependency structure (e.g.,
% positively and/or negatively correlated tests) is used. 'dep'
% is always appropriate to use but is less powerful than 'pdep.'
% {default: 'pdep'}
%
% Outputs:
% h - A binary vector or matrix of the same size as the input "pvals."
% If the ith element of h is 1, then the test that produced the
% ith p-value in pvals is significant (i.e., the null hypothesis
% of the test is rejected).
% crit_p - All p-values less than or equal to crit_p are significant
% (i.e., their null hypotheses are rejected). If no p-values are
% significant, crit_p=0.
%
%
% References:
% Benjamini, Y. & Hochberg, Y. (1995) Controlling the false discovery
% rate: A practical and powerful approach to multiple testing. Journal
% of the Royal Statistical Society, Series B (Methodological). 57(1),
% 289-300.
%
% Benjamini, Y. & Yekutieli, D. (2001) The control of the false discovery
% rate in multiple testing under dependency. The Annals of Statistics.
% 29(4), 1165-1188.
%
% Example:
% [dummy p_null]=ttest(randn(12,15)); %15 tests where the null hypothesis
% %is true
% [dummy p_effect]=ttest(randn(12,5)+1); %5 tests where the null
% %hypothesis is false
% [h crit_p]=fdr_bh([p_null p_effect],.05,'pdep','yes');
%
%
% Author:
% David M. Groppe
% Kutaslab
% Dept. of Cognitive Science
% University of California, San Diego
% March 24, 2010
if nargin<3,
method='pdep';
end
%Note: pvals is already sorted, and a row vector
m=length(pvals); %number of tests
if strcmpi(method,'pdep'),
%BH procedure for independence or positive dependence
thresh=[1:m]*q/m;
elseif strcmpi(method,'dep')
%BH procedure for any dependency structure
denom=m*sum(1./[1:m]);
thresh=[1:m]*q/denom;
else
error('Argument ''method'' needs to be ''pdep'' or ''dep''.');
end
rej=pvals<=thresh;
max_id=find(rej,1,'last'); %find greatest significant pvalue
if isempty(max_id),
crit_p=0;
h=pvals*0;
else
crit_p=pvals(max_id);
h=pvals<=crit_p;
end