Skip to content

Commit

Permalink
Merge pull request #237 from zieglerSe/SAM-Rework
Browse files Browse the repository at this point in the history
  • Loading branch information
bvenn committed Jan 25, 2023
2 parents b0b766b + a0cd3ec commit f73b05f
Show file tree
Hide file tree
Showing 9 changed files with 1,141 additions and 15,252 deletions.
214 changes: 198 additions & 16 deletions docs/Testing.fsx
Expand Up @@ -12,18 +12,24 @@ categoryindex: 0
(*** condition: prepare ***)
#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 2.0.0-preview.16"
#r "nuget: Plotly.NET, 3.0.1"
#r "nuget: FSharpAux, 1.1.0"
#r "nuget: Deedle, 3.0.0"

(*** condition: ipynb ***)
#if IPYNB
#r "nuget: Plotly.NET, 2.0.0-preview.16"
#r "nuget: Plotly.NET.Interactive, 2.0.0-preview.16"
#r "nuget: Plotly.NET, 3.0.1"
#r "nuget: Plotly.NET.Interactive, 3.0.2"
#r "nuget: FSharp.Stats"
#r "nuget: FSharpAux, 1.1.0"
#r "nuget: Deedle, 3.0.0"
#endif // IPYNB

open Plotly.NET
open Plotly.NET.StyleParam
open Plotly.NET.LayoutObjects
open FSharpAux
open Deedle

//some axis styling
module Chart =
Expand All @@ -34,6 +40,8 @@ module Chart =
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.withXAxis (myAxis x)
|> Chart.withYAxis (myAxis y)



(**
Expand All @@ -49,7 +57,7 @@ _Summary:_ this tutorial explains how to perform various statistical tests with
- [T-Test](#T-Test)
- [Anova](#Anova)
- [F-Test](#F-Test)
- [H Test](#H-Test)
- [H-Test](#H-Test)
- [Friedman-Test](#Friedman-Test)
- [Wilcoxon signed-rank Test](#Wilcoxon-Test)
- [Chi-Squared Test](#Chi-Squared-Test)
Expand All @@ -63,6 +71,7 @@ _Summary:_ this tutorial explains how to perform various statistical tests with
- [Multiple testing](#Multiple-testing)
- [Benjamini-Hochberg](#Benjamini-Hochberg)
- [Q Value](#Q-Value)
- [SAM](#SAM)
FSharp.Stats provides hypothesis tests for different applications.
A hypothesis test is a statistical test that is used to determine whether there is enough evidence
Expand Down Expand Up @@ -368,16 +377,16 @@ let fTestFromParameters = FTest.testVariancesFromVarAndDof sampleF1 sampleF2
(**
Using a significance level of 0.05 the sample variances do differ significantly.
### H Test
### H-Test
The H test is also known as Kruskal-Wallis one-way analysis-of-variance-by-ranks and is the non-parametric equivalent of one-way ANOVA.
The H-test is also known as Kruskal-Wallis one-way analysis-of-variance-by-ranks and is the non-parametric equivalent of one-way ANOVA.
It is a non-parametric test for comparing the means of more than two independent samples (equal or different sample size), and therefore is an extension of Wilcoxon-Mann-Whitney two sample test.
Testing with H test gives information whether the samples are from the same distribution.
A benefit of the H test is that it does not require normal distribution of the samples.
A benefit of the H-test is that it does not require normal distribution of the samples.
The downside is that there is no information which samples are different from each other, or how many differences occur. For further investigation a post hoc test is recommended.
The distribution of the H test statistic is approximated by chi-square distribution with degrees of freedom = sample count - 1.
The distribution of the H-test statistic is approximated by chi-square distribution with degrees of freedom = sample count - 1.
The implemented H-test is testing for duplicate values in the data.
Duplicates lead to ties in the ranking, and are corrected by using a correction factor.
Expand All @@ -403,15 +412,15 @@ let groupB = [70.;77.;48.;64.;71.;75.]
let groupC = [80.;76.;34.;80.;73.;80.]
let samples = [groupA;groupB;groupC]

// calculation of the H test
// calculation of the H-test
let hResult = HTest.createHTest samples

(*** include-value:hResult ***)

(**
_PValueRight is significant at a alpha level of 0.05_
A suitable post hoc test for H tests is Dunn's test.
A suitable post hoc test for H-tests is Dunn's test.
*)

(**
Expand Down Expand Up @@ -458,6 +467,7 @@ ID | pre | month 1| month 2| month 3| month 4
</pre>
Ranking the results - average if values appear multiple times in one ID
Expand Down Expand Up @@ -824,8 +834,8 @@ let aErrorAcc =
[1. .. 100.]
|> List.map (fun x -> x,(1. - 0.95**x))
|> Chart.Line
|> Chart.withAxisTitles "number of tests (k)" "probability of at least one false positive test"

|> Chart.withXAxisStyle ( "number of tests (k)" )
|> Chart.withYAxisStyle ( "probability of at least one false positive test")
(*** condition: ipynb ***)
#if IPYNB
aErrorAcc
Expand Down Expand Up @@ -865,7 +875,8 @@ let bhValues =
Chart.Line(pValsAdj,Name="adj")
]
|> Chart.combine
|> Chart.withAxisTitles "pValue" "BH corrected pValue"
|> Chart.withXAxisStyle "pValue"
|> Chart.withYAxisStyle "BH corrected pValue"

(*** condition: ipynb ***)
#if IPYNB
Expand Down Expand Up @@ -929,8 +940,8 @@ let qHisto =
Chart.Line([(0.,pi0);(1.,pi0)],Name="pi<sub>0</sub>",LineDash=StyleParam.DrawingStyle.Dash)
]
|> Chart.combine
|> Chart.withAxisTitles "p value" "density"

|> Chart.withXAxisStyle "p value"
|> Chart.withYAxisStyle"density"
(*** condition: ipynb ***)
#if IPYNB
qChart
Expand All @@ -947,4 +958,175 @@ qHisto

(***hide***)
qHisto |> GenericChart.toChartHTML
(***include-it-raw***)
(***include-it-raw***)


(**
###SAM
SAM (Significance Analysis of Microarrays) is a method developed to overcome multiple testing problems, for example in microarray experiments.
SAM, short for Significance Analysis of Microarrays, is a statistical analysis created, but not restricted for, microarray analysis.
It serves as a blue print for any permutation test.
Therefore, high throughput experiments can be analysed using a combined permutation-bootstrap-method. For more in depth information see the [SAM BlogPost](https://csbiology.github.io/CSBlog/posts/7_SAM.html).
*)

(**
Data:
To use SAM, data need to be in the format (string*float[])[]), with string being the name and float array being the replicates. One way of achieving this is the following data preparation:
<center><img style="max-width:40%" src="../img/DataStructureSAM.jpeg"></img></center>
*)
(**
Columns are samples, here 1 and 2, representing control and factor. Rows are transcript counts (here indicated with gene identifier).
Rows are indicated by the sample name, here 1 and 2, representing control and factor. Columns are indicated with the rowkey(A1), here the gene id. This can be saved as .txt.
The next step is to read in the data, e.g. via deedle, and to create a dataframe. The rows are indexed by the sample name and the rowkeys are extracted.
*)

let df:Frame<string,string> =
Frame.ReadCsv(@"data/TestDataSAM.txt",hasHeaders=true,separators = "\t")
// here, the name of A1 is needed.
|> Frame.indexRows "gene"

// get Rowkeys as Array
let rowheader :string[] = df.RowKeys |> Seq.toArray

// to separate control and factor sets (sample1 and sample2, respectively),
// the datasets are chunked by the number of replicates (here: triplicates -> chunkBySize 3).
let (preData1,preData2) :float[][] * float [][]=
df
|> Frame.getRows
|> Series.values
|> Seq.map (Series.values >> Seq.toArray >> Array.chunkBySize 3 >> fun x -> x.[0],x.[1])
|> Array.ofSeq
|> Array.unzip

// After chunking, the datasets are separated by factor and the rowkey is added to the data for later identification.
let data1 = Array.zip rowheader preData1
let data2 = Array.zip rowheader preData2
(**
Optional: Data can be normalised by median centering using the following function:
*)
open SAM
let corrected1 =
let medCorrect =
medianCentering data1
Array.zip rowheader medCorrect
let corrected2 =
let medCorrect =
medianCentering data2
Array.zip rowheader medCorrect

(**
This calculates the median of a column and subtracts it from each value in this column.
The median centering is performed for each column (all samples) separately.
With data1 and data2 , or corrected1 and corrected2, SAM can be performed.
*)
let res = FSharp.Stats.Testing.SAM.twoClassUnpaired 100 0.05 data1 data2 (System.Random(1234))
(**
The parameters are number of permutations, desired FDR, the datasets and a seed. For balanced permutations as in SAM, it seems to be sufficient to use ~ 100 permutations (also default in samR). The seed is used for randomization of the permutations (System.Random()), or can be fixed to achieve the same results multiple times (e.g. System.Random(1234)).
Result contains now the following information: s0, pi0, delta, upper Cut, lower Cut, positive significant bioitems, negative significant bioitems, non significant bioitems, False Discovery Rate (FDR), and median False Positives.
The following code will help to achieve the typical SAM plot.
*)

let SAMChart =

let observed = [| res.NegSigBioitem; res.NonSigBioitem; res.PosSigBioitem|] |> Array.concat
let obs = observed |> Array.map (fun x -> x.Statistics)
let expected = res.AveragePermutations |> Array.map (fun x -> x.Statistics)
let minDi = Seq.min obs
let maxDi = Seq.max obs


// positive significant changes
let posExpected = expected.[res.NegSigBioitem.Length + res.NonSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length + res.PosSigBioitem.Length-1]
let posChart =
Chart.Point(posExpected,res.PosSigBioitem |> Array.map (fun x -> x.Statistics))
|> Chart.withLineStyle(Color=Color.fromKeyword Green)
|> Chart.withTraceInfo("positive change",Visible = Visible.True )


// no significant changes
let nonex = expected.[res.NegSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length-1]
let nonchart =
Chart.Point(nonex,res.NonSigBioitem |> Array.map (fun x -> x.Statistics))
|> Chart.withLineStyle(Color=Color.fromKeyword Gray)
|> Chart.withTraceInfo("no change",Visible = Visible.True)

// negative significant changes
let negex = expected.[0 .. res.NegSigBioitem.Length-1]
let negchart =
Chart.Point(negex,res.NegSigBioitem |> Array.map (fun x -> x.Statistics))
|> Chart.withLineStyle(Color=Color.fromKeyword Red)
|> Chart.withTraceInfo("negative change",Visible = Visible.True)

let samValues =
[
negchart
nonchart
posChart
]
|> Chart.combine

let chartConfig =
let svdConfig =
ConfigObjects.ToImageButtonOptions.init(
Format = StyleParam.ImageFormat.SVG)
Config.init (
ToImageButtonOptions = svdConfig,
ModeBarButtonsToAdd=[ModeBarButton.HoverCompareCartesian]

)

let cutLineUp = [(minDi + res.Delta) ; (maxDi + res.Delta)]
let cutsUp =
Chart.Line(cutLineUp,[minDi;maxDi])
|> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5)
|> Chart.withTraceInfo("delta",Visible = Visible.True)
let cutLineLow = [(minDi - res.Delta) ; (maxDi - res.Delta)]
let cutsLow =
Chart.Line(cutLineLow,[minDi;maxDi])
|> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5)
|> Chart.withTraceInfo("delta",Visible = Visible.True)
let linechart =
Chart.Line([minDi;maxDi], [minDi;maxDi])
|> Chart.withTraceInfo("bisecting angle",Visible = Visible.True)
|> Chart.withLineStyle(Color=Color.fromKeyword Black, Width = 1)

let uppercut =
let xAnchorUppercut = [minDi .. 5. .. maxDi]
Chart.Line (xAnchorUppercut, List.init xAnchorUppercut.Length (fun x -> res.UpperCut))

|> Chart.withTraceInfo("upper cut",Visible = Visible.True)
|> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash, Width = 0.3)

let lowercut =
Chart.Line([minDi;maxDi],[res.LowerCut;res.LowerCut])
|> Chart.withTraceInfo("lower cut",Visible = Visible.True)
|> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash,Width = 0.3)
//|> Chart.withXAxisStyle(MinMax = (-15,20))

|> Chart.withTraceInfo("lower cut",Visible = Visible.True)
let plot =
[linechart;
samValues;
cutsUp;
cutsLow;
uppercut;
lowercut]
|> Chart.combine
|> Chart.withTitle(title = "SAM results")
|> Chart.withXAxisStyle("expected Score")
|> Chart.withYAxisStyle ("observed Score")
|> Chart.withConfig(chartConfig)
|> Chart.withTemplate(ChartTemplates.lightMirrored)
plot




(*** condition: ipynb ***)
#if IPYNB
SAMChart
#endif // IPYNB

(***hide***)
System.IO.File.ReadAllText (__SOURCE_DIRECTORY__ + "/img/SAM005.html")
(*** include-it-raw ***)
Binary file added docs/img/DataStructureSAM.jpeg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit f73b05f

Please sign in to comment.