/
PvalueAdjust.fs
251 lines (211 loc) · 11.3 KB
/
PvalueAdjust.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
namespace FSharp.Stats.Testing
open FSharp.Stats
/// This module contains functions to adjust for multiple testing errors in statistical tests.
module MultipleTesting =
/// Benjamini-Hochberg Correction (BH)
/// 'projection' should return a tuple of any identifier and the pValues as float, when applied to 'rawP'
/// This function applies the Benjamini-Hochberg multiple testing correcture and returns all False Discovery Rates to which the given p-values are still
/// significant.
/// Note: corrected pValues are not sorted in original order!
let inline benjaminiHochbergFDRBy (projection:'a -> 'b*float) (rawP:seq<'a>) =
let pVals = rawP |> (Array.ofSeq >> Array.map projection)
// recursive function calculating cumulative minimum
let rec cummin (pValues:List<_*float>) (nrPvalues:int) (i:int) (min:float) (acc: ('b*float) list) =
match pValues with
| [] ->
acc |> List.rev
| x::rest when System.Double.IsNaN(snd(x)) ->
cummin rest nrPvalues (i) min ((fst(x),nan)::acc)
| x::rest ->
let prd = (float(nrPvalues)/float(nrPvalues-i))*snd(x)
let prd = if(prd > 1.0) then 1.0 else prd
let value = if(prd<=min) then prd else min
cummin rest nrPvalues (i+1) value ((fst(x),value) :: acc)
let rawpListMinusNan =
pVals
|> Seq.filter (fun (_,x) -> not (System.Double.IsNaN x))
|> Seq.toList
let rawpListNan =
pVals
|> Seq.filter (fun (_,x) -> (System.Double.IsNaN x))
|> Seq.toList
let npval = Seq.length (rawpListMinusNan)
let sortedRawp =
rawpListMinusNan
|> List.sortWith(fun (_,x) (_,y) -> if (x) > (y) then -1 else 1)
let adjp = cummin sortedRawp npval 0 System.Double.PositiveInfinity []
adjp @ rawpListNan
/// Benjamini-Hochberg Correction (BH)
/// This function applies the Benjamini-Hochberg multiple testing correcture and returns all False Discovery Rates to which the given p-values are still
/// significant.
let benjaminiHochbergFDR (rawPValues:seq<float>) =
let pValsIndexed =
rawPValues
|> Seq.indexed
benjaminiHochbergFDRBy id pValsIndexed
|> List.sortBy fst
|> List.map snd
|> Seq.ofList
// John D. Storey
// http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/R/library/qvalue/html/qvalue.html
// http://qvalue.princeton.edu/
/// Estimate the q-values for a given set of p-values. The q-value of a test measures the proportion of false positives incurred (called the false discovery rate) when that particular test is called significant.
module Qvalues =
//https://genomicsclass.github.io/book/pages/multiple_testing.html <- differt method to calculate pi0, than shown below
/// Estimates pi0 from given p-Values by Storeys bootstrap .
/// pi0 is the value to which, one can find certain values of the distribution at random.
/// If this returns 0. lambda needs to be smaller.
/// Math Definition for lambda = [0,1) . Contains values from 0 to 1, but not 1.
let pi0BootstrapWithLambda (lambda:float[]) (pValues:float[]) =
// checks how many pVals are higher than a certain lambdaVal 'a' and gives those a score of 1 and otherwise one of 0.
// Then takes avg of those numbers and divides the avg by (1 - lambda 'a').
let pi0 = Array.init lambda.Length ( fun i ->
let tmp =
pValues
|> Array.averageBy (fun v -> if (v >= lambda.[i]) then 1. else 0. )
tmp / (1. - lambda.[i])
)
let minpi0 = Array.min(pi0)
let rnd = System.Random()
// Takes random values from the pVal [] and creates a new arr of the same length of those random pVals.
// Then applies system of 'pi0' from above and uses the new created pi0 bootstrap [] to weight previous
// iterations of this function (seen in the 'mse' of this scope).
// Repeated 100 times.
let rec floop (counter:int) (result:float[]) =
if counter > 0 then
let pboot = Array.sampleWithReplacement rnd pValues pValues.Length
let pi0boot = Array.init lambda.Length ( fun i ->
let tmp =
pboot
|> Array.averageBy (fun v -> if (v >= lambda.[i]) then 1. else 0. )
tmp / (1. - lambda.[i])
)
let mse = Array.map2 (fun m p -> m + ((p - minpi0)**2.0) ) result pi0boot
floop (counter - 1) mse
else
result
let mse = floop (100) (Array.zeroCreate (lambda.Length))
// returns the value of the original pi0 arr, at the position of the smallest value found in the mse arr after 100 floop iterations.
let tmp = pi0.[(mse |> Array.findIndex(fun v -> v = (Array.min mse)))]
min (tmp) (1.0)
/// Estimates pi0 from given p-Values by Storeys bootstrap method using default lambda's
/// pi0 is the value to which, one can find certain values of the distribution at random.
/// If this returns 0. default lambda needs to be smaller. One should use 'pi0BootstrapWithLambda' in this case.
let pi0Bootstrap (pValues: float []) =
pi0BootstrapWithLambda [|0.0 ..0.05..0.9|] pValues
// let pValueOfQValue (qvalues:float[]) =
// // estimate of pi0 used to create the q-value that's the maximum q-value (or very, very close to it)
// let pi0 = Array.max qvalues
// // m0, the estimated number of true nulls
// let m0 = float qvalues.Length * pi0
// qvalues
// |> Statistics.Descriptive.Rank.breakByMean
// |> Seq.sortBy (fun r -> r.Position)
// // multiply each q-value by the proportion of true nulls expected to be under it (the inverse of how you get there from the p-value)
// |> Seq.map (fun r -> r.Value * r.RankIndex / m0)
// |> Array.ofSeq
//let private bind (arr:float[]) =
// let arr' = Array.copy arr
// for i=1 to arr'.Length-1 do
// if arr'.[i] < arr'.[i-1] then
// arr'.[i] <- arr'.[i-1]
// arr'
/// When used for QValues.ofPValues(Robust) it iterates through the pValues in ascending order and compares their new QValues.
/// To do this it compares the QValues in the ascending order of the PValues (QValues are sorted by their related PValues so that the first QValue is the one which is related to the previously smalles PValue).
/// Should the QValue related to the second PValue (QValue.[1]) be smaller the the QValue related to the smallest PValue (QValue.[0]) the later QValue is
/// replaced by the first QValue (QValue.[1] <- (QValue.[0])). Should all following QValues be smaller than that QVal, all will be replaced by it.
let private bindBy (objArr:float []) (arr:float[]) =
let arr' = Array.copy arr
let objArr' = Array.copy objArr
let index = Array.init arr.Length id
System.Array.Sort(objArr',index)
for i=1 to arr'.Length-1 do
if arr'.[index.[i]] < arr'.[index.[i-1]] then
arr'.[index.[i]] <- arr'.[index.[i-1]]
arr'
/// Calculates q-values from given p-values and returns an array of qValues in the same order.
/// 'pi0' can be calculated with 'pi0Bootstrap' or 'pi0BootstrapWithLambda'.
/// See Storey JD (2002) JRSS-B 64: 479-498.
let ofPValuesRobust (pi0:float) (pValues: float []) =
let m = float pValues.Length
let m0 = m * pi0
pValues
// replaces all values with ints ordered by their size. The smallest value will get 1,
// while the largest value of the arr will get arr.Length as new rank
|> Rank.rankFirst
|> Array.map float
|> Array.mapi (fun i r ->
let qVal =
let pVal = pValues.[i]
pVal * m0 / (r * (1. - (1. - pVal)**m))
min qVal 1.
)
|> bindBy pValues
/// Calculates q-values from given p-values and returns an array of qValues in the same order.
/// 'pi0' can be calculated with 'pi0Bootstrap' or 'pi0BootstrapWithLambda'.
let ofPValues (pi0:float) (pValues:float[]) =
let m0 = float pValues.Length * pi0
pValues
// replaces all values with ints ordered by their size. The smallest value will get 1,
// while the largest value of the arr will get arr.Length as new rank
|> Rank.rankFirst
|> Array.map float
|> Array.mapi (fun i r ->
let qVal =
let pVal = pValues.[i]
pVal / r * m0
min qVal 1.
)
|> bindBy pValues
// let inline checkNonNull argName arg =
// match box arg with
// | null -> nullArg argName
// | _ -> ()
//
//
// let pairwiseWithLast (source: seq<'T>) =
// checkNonNull "source" source
// seq { use ie = source.GetEnumerator()
// if ie.MoveNext() then
// let iref = ref ie.Current
// while ie.MoveNext() do
// let j = ie.Current
// yield (!iref, j)
// iref := j
// yield (!iref,!iref)}
//
//
// let qValueOfPValue' (pi0:float) (pvalues:float[]) =
// let m0 = float pvalues.Length * pi0
// pvalues
// |> Statistics.Descriptive.Rank.breakByMean
// |> Seq.map (fun r -> let v = r.Value / r.RankIndex * m0
// {r with Value = v}
// )
// |> pairwiseWithLast
// |> Seq.map (fun (f,s) -> let v = min (min f.Value s.Value) 1.
// {f with Value = v}
// )
// |> Seq.sortBy (fun r -> r.Position)
// |> Seq.map (fun r -> r.Value)
// |> Array.ofSeq
//
//
//
//
// //// Example
// //let pvalues = Seq.fromFile "D:\\output.txt" |> Seq.map float |> Seq.toArray
// //
// //let pi0 = pi0_Bootstrap [|0.0 ..0.05..0.9|] pvalues
// //
// //let qvalues = qValueOfPValue pi0 pvalues
// //let qvalues' = qValueOfPValue' pi0 pvalues
// //
// //
// //let bw = Statistics.Fitting.Bandwidth.nrd0 pvalues
// //let histo = Descriptive.Histogram.create (bw/2.) pvalues |> Descriptive.EmpiricalPMF.ofHistogram |> Descriptive.EmpiricalPMF.getZip |> Chart.Point |> Chart.ShowChart
// //
// //
// //
// //Chart.Point(Seq.zip pvalues qvalues ) |> Chart.ShowChart
// //