Skip to content

Commit

Permalink
update mlg vignette to fix #177
Browse files Browse the repository at this point in the history
  • Loading branch information
zkamvar committed May 5, 2018
1 parent 08c3134 commit 98f9708
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 156 deletions.
124 changes: 47 additions & 77 deletions vignettes/mlg.Rmd
Expand Up @@ -386,89 +386,59 @@ the biology of your organism.

To help visualize this, there is the function `filter_stats()`, which will plot
the output of the filtering algorithm. For simplicity, we will use the `Pinf`
data set.
data set representing *Phytophthora infestans* samples from Mexico and South America `r citep(bib["goss2009population"])`.

```{r}
```{r plot_replen}
data(Pinf)
Pinf
```
```{r, eval = FALSE}
pinfreps <- fix_replen(Pinf, c(2, 2, 6, 2, 2, 2, 2, 2, 3, 3, 2))
pinf_filtered <- filter_stats(Pinf, distance = bruvo.dist, replen = pinfreps, plot = TRUE)
```
```{r, echo = FALSE}
pinfreps <- fix_replen(Pinf, c(2, 2, 6, 2, 2, 2, 2, 2, 3, 3, 2))
pinf_dist <- bruvo.dist(Pinf, replen = pinfreps)
pinf_filtered <- structure(list(farthest = structure(list(THRESHOLDS = c(0.0126262626262626,
0.0218986742424242, 0.0227272727272727, 0.0227272727272727, 0.0353535353535354,
0.0416666666666667, 0.0426136363636364, 0.0454545440998944, 0.0469933712121212,
0.0475852031900425, 0.0568181818181818, 0.0568181818181818, 0.0583570075757576,
0.0645123106060606, 0.0653409090909091, 0.0681818181818182, 0.0787168560606061,
0.0795454545454545, 0.0877274792603772, 0.0887784090909091, 0.09375,
0.0946969696969697, 0.0972222218459303, 0.101444128787879, 0.125,
0.130997474747475, 0.1359375, 0.13740234375, 0.144886363636364,
0.149999997019768, 0.156565656565657, 0.166883656472871, 0.18115234375,
0.193170719526031, 0.210227261890065, 0.215909090909091, 0.21874982660467,
0.22561553030303, 0.23115234375, 0.232954502105713, 0.234374994039536,
0.23532196969697, 0.24147722937844, 0.258500315926292, 0.272017021070827,
0.274999999592546, 0.283380660143766, 0.29208984375, 0.299999999999997,
0.300423362038352, 0.303269896844421, 0.305499606455366, 0.306322496341461,
0.3132568359375, 0.336390339246273, 0.337091619318182, 0.340897971256213,
0.351708152074769, 0.356196582317352, 0.360792333090847, 0.36487923968922,
0.396464635627438, 0.402803457144535, 0.403320301662627, 0.423251064663584,
0.437319652600722, 0.454361644319513, 0.470472120576435, 0.508276028103299,
0.514966882581328, 0.574741401973021)), .Names = "THRESHOLDS"),
average = structure(list(THRESHOLDS = c(0.0126262626262626,
0.0218986742424242, 0.0227272727272727, 0.0227272727272727,
0.0258838383838384, 0.0416666666666667, 0.0426136363636364,
0.0454545440998944, 0.0455137310606061, 0.0466678503787879,
0.0475852031900425, 0.0511659564393939, 0.0545526883417508,
0.0568181818181818, 0.0568181818181818, 0.0653409090909091,
0.0681818181818182, 0.0746970385413379, 0.0776515151515152,
0.0795454545454545, 0.0838660037878788, 0.0866477272727273,
0.0915404036641121, 0.09375, 0.0990451334702848, 0.117836346931329,
0.125, 0.1359375, 0.13740234375, 0.144886363636364, 0.149999997019768,
0.166883656472871, 0.169220816635022, 0.18115234375, 0.182812413302335,
0.189831002331002, 0.193170719526031, 0.206485256010836,
0.210227261890065, 0.210983060586332, 0.215909090909091,
0.222481511544012, 0.226043217397815, 0.231752027784559,
0.232947945592142, 0.234374967488376, 0.239980004751746,
0.256210318427872, 0.256249999592546, 0.256803937821849,
0.265303876476758, 0.270312499999997, 0.274878910364527,
0.284377216447246, 0.287522453250307, 0.288763944769339,
0.289024162970308, 0.289442437948814, 0.303269896844421,
0.30955901687567, 0.320306661564573, 0.32450982493207, 0.324653779309478,
0.326784648203577, 0.337955069560145, 0.343659817474595,
0.353714804095389, 0.358177516571547, 0.360592245250208,
0.387454092730527, 0.412903107125552)), .Names = "THRESHOLDS"),
nearest = structure(list(THRESHOLDS = c(0.0126262626262626,
0.0218986742424242, 0.0227272727272727, 0.0227272727272727,
0.0227272727272727, 0.0340909090909091, 0.0397727272727273,
0.0416666666666667, 0.0426136363636364, 0.0440340909090909,
0.0440340909090909, 0.0454545440998944, 0.0475852031900425,
0.0568181818181818, 0.0568181818181818, 0.0568181818181818,
0.0653409090909091, 0.0681818181818182, 0.0681818181818182,
0.0681818181818182, 0.071969696969697, 0.0795454545454545,
0.0823863636363636, 0.0858585854822939, 0.0883838383838384,
0.09375, 0.1248046875, 0.125, 0.134706415311255, 0.1359375,
0.13740234375, 0.144886363636364, 0.146875, 0.149999997019768,
0.151909718910853, 0.15625, 0.15625, 0.159326171875, 0.161024305141634,
0.166883656472871, 0.168797348484848, 0.172743055555556,
0.173295454545455, 0.177680120286014, 0.18115234375, 0.181818168271672,
0.181818180463531, 0.1841796875, 0.184374998509884, 0.186618041992188,
0.190625, 0.193170719526031, 0.193749997019768, 0.20625,
0.210227261890065, 0.21240234375, 0.21484375, 0.215277776949935,
0.215902837837348, 0.218747171488675, 0.223487314551768,
0.224902342259884, 0.224902342632413, 0.240624999999997,
0.241050253876231, 0.241137550292405, 0.246679684519768,
0.249129393034511, 0.259232954545455, 0.278124998509884,
0.287377917766571)), .Names = "THRESHOLDS")), .Names = c("farthest",
"average", "nearest"))
plot_filter_stats(Pinf, pinf_filtered, pinf_dist, breaks = "Scott")
```

We can see that the different algorithms behave quite differently.

We can see that the different algorithms behave similarly for small thresholds,
but begin to differ over larger thresholds.

This function is useful for finding all thresholds at which MLLs are collapsed,
which can help with choosing a threshold that collapses putative clones in our
sample into lineages.

### Choosing a threshold

After you have chosen a genetic distance and a filtering algorithm, you must
then decide on the threshold to represent the **minimum genetic distance at
which two individuals would be considered from different clonal lineages**.

One method described in the literature of choosing a threshold is to look for an
initial, small peak in the histogram of pairwise genetic distances and set the
threshold to be between that peak and the larger peak `r citep(bib[c("arnaud2007standardizing", "bailleul2016rclone")])`.
This initial peak likely represents clones differentiated by a small set of
random mutations. You can see this in the figure above at a threshold of ~0.11
for the "farthest neighbor" algorithm.

However, if this peak is not obvious, then another method is to look for the largest gap between all putative thresholds. For this, you can use the `cutoff_predictor()` function with the output of `filter_stats()`. It should be noted, however, that this method is not a perfect solution. If we take the results from above, we can find the threshold for each algorithm:

```{r cutof_predictor}
print(farthest_thresh <- cutoff_predictor(pinf_filtered$farthest$THRESHOLDS))
print(average_thresh <- cutoff_predictor(pinf_filtered$average$THRESHOLDS))
print(nearest_thresh <- cutoff_predictor(pinf_filtered$nearest$THRESHOLDS))
```

Now we can define multilocus lineages for *P. infestans* with the following criteria:

- Genetic Distance: Bruvo's Distance
- Filtering Algorithm: Farthest neighbor
- Threshold: `r signif(farthest_thresh, 3)`

```{r pinf_thresh}
mlg.filter(Pinf, distance = bruvo.dist, replen = pinfreps, algorithm = "f") <- farthest_thresh
Pinf
```

> Note: Please take care to critically evaluate the results and make sure it
> applies to your data. This function WILL give you an answer whether or not one
> truely exists. Additionally, For diploid organisms, another method of choosing a
> threshold is available in the **RClone** package that involves simulating
> outcrossing and inbreeding events `r citep(bib["bailleul2016rclone"])`.
## Custom ("custom")

Expand Down
174 changes: 95 additions & 79 deletions vignettes/the_bibliography.bib
Expand Up @@ -7,6 +7,22 @@ @article{parks1993study
publisher={JSTOR}
}

@article{bailleul2016rclone,
doi = {10.1111/2041-210x.12550},
url = {https://doi.org/10.1111/2041-210x.12550},
year = 2016,
month = {mar},
publisher = {Wiley-Blackwell},
volume = {7},
number = {8},
pages = {966--970},
author = {Diane Bailleul and Solenn Stoeckel and Sophie Arnaud-Haond},
editor = {Timoth{\'{e}}e Poisot},
title = {{RClone}: a package to identify {MultiLocus} Clonal Lineages and handle clonal data sets in {R}.},
journal = {Methods in Ecology and Evolution}
}


@article{arnaud2007standardizing,
title={Standardizing methods to address clonality in population studies},
author={Arnaud-Haond, Sophie and Duarte, Carlos M and Alberto, Filipe and Serr{\~a}o, Ester A},
Expand Down Expand Up @@ -75,16 +91,16 @@ @article{everhart2014finescale
}

@article{goss2014irish,
author = {Goss, Erica M. and Tabima, Javier F. and Cooke, David E. L. and Restrepo, Silvia and Fry, William E. and Forbes, Gregory A. and Fieland, Valerie J. and Cardenas, Martha and Gr眉nwald, Niklaus J.},
author = {Goss, Erica M. and Tabima, Javier F. and Cooke, David E. L. and Restrepo, Silvia and Fry, William E. and Forbes, Gregory A. and Fieland, Valerie J. and Cardenas, Martha and Gr眉nwald, Niklaus J.},
title = {The Irish potato famine pathogen Phytophthora infestans originated in central Mexico rather than the Andes},
year = {2014},
year = {2014},
doi = {10.1073/pnas.1401884111},
volume = {111},
pages={8791--8796},
number={24},
URL = {http://www.pnas.org/content/early/2014/05/29/1401884111.abstract},
eprint = {http://www.pnas.org/content/early/2014/05/29/1401884111.full.pdf+html},
journal = {Proceedings of the National Academy of Sciences}
URL = {http://www.pnas.org/content/early/2014/05/29/1401884111.abstract},
eprint = {http://www.pnas.org/content/early/2014/05/29/1401884111.full.pdf+html},
journal = {Proceedings of the National Academy of Sciences}
}

@article{excoffier1992analysis,
Expand Down Expand Up @@ -176,54 +192,54 @@ @article {Agapow:2001
}

@article{Smith:1993,
author = {Smith, J M and Smith, N H and O'Rourke, M and Spratt, B G},
author = {Smith, J M and Smith, N H and O'Rourke, M and Spratt, B G},
title = {How clonal are bacteria?},
volume = {90},
number = {10},
pages = {4384-4388},
year = {1993},
doi = {10.1073/pnas.90.10.4384},
eprint = {http://www.pnas.org/content/90/10/4384.full.pdf+html},
journal = {Proceedings of the National Academy of Sciences}
volume = {90},
number = {10},
pages = {4384-4388},
year = {1993},
doi = {10.1073/pnas.90.10.4384},
eprint = {http://www.pnas.org/content/90/10/4384.full.pdf+html},
journal = {Proceedings of the National Academy of Sciences}
}

@article{Brown:1980,
author = {Brown,A.H.D. and Feldman,M.W. and Nevo,E.},
author = {Brown,A.H.D. and Feldman,M.W. and Nevo,E.},
title = {MULTILOCUS STRUCTURE OF NATURAL POPULATIONS OF \textit{{Hordeum spontaneum}}},
volume = {96},
volume = {96},
number = {2},
pages = {523--536},
year = {1980},
URL = {http://www.genetics.org/content/96/2/523.abstract},
eprint = {http://www.genetics.org/content/96/2/523.full.pdf+html},
journal = {Genetics}
pages = {523--536},
year = {1980},
URL = {http://www.genetics.org/content/96/2/523.abstract},
eprint = {http://www.genetics.org/content/96/2/523.full.pdf+html},
journal = {Genetics}
}


@article{Jombart:2008,
author = {Jombart, Thibaut},
author = {Jombart, Thibaut},
title = {adegenet: a {R} package for the multivariate analysis of genetic markers},
volume = {24},
number = {11},
pages = {1403-1405},
year = {2008},
doi = {10.1093/bioinformatics/btn129},
URL = {http://bioinformatics.oxfordjournals.org/content/24/11/1403.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/24/11/1403.full.pdf+html},
journal = {Bioinformatics}
volume = {24},
number = {11},
pages = {1403-1405},
year = {2008},
doi = {10.1093/bioinformatics/btn129},
URL = {http://bioinformatics.oxfordjournals.org/content/24/11/1403.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/24/11/1403.full.pdf+html},
journal = {Bioinformatics}
}

@article{Jombart:2011,
author = {Jombart, Thibaut and Ahmed, Isma茂l},
author = {Jombart, Thibaut and Ahmed, Isma茂l},
title = {adegenet 1.3-1: new tools for the analysis of genome-wide {SNP} data},
volume = {27},
number = {21},
pages = {3070-3071},
year = {2011},
doi = {10.1093/bioinformatics/btr521},
URL = {http://bioinformatics.oxfordjournals.org/content/27/21/3070.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/27/21/3070.full.pdf+html},
journal = {Bioinformatics}
volume = {27},
number = {21},
pages = {3070-3071},
year = {2011},
doi = {10.1093/bioinformatics/btr521},
URL = {http://bioinformatics.oxfordjournals.org/content/27/21/3070.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/27/21/3070.full.pdf+html},
journal = {Bioinformatics}
}

@article{Grunwald:2006,
Expand All @@ -235,7 +251,7 @@ @article{Grunwald:2006
pages={1134-1141},
year={2006}
}

@Article{Grunwald:2003,
AUTHOR = {Gr眉nwald, Niklaus J. and Goodwin, Stephen B. and Milgroom, Michael G. and Fry, William E.},
TITLE = {Analysis of genotypic diversity data for populations of microorganisms.},
Expand Down Expand Up @@ -263,13 +279,13 @@ @article{Heck:1975
copyright = {Copyright 漏 1975 Ecological Society of America},
}

@article{Hurlbert:1971,
@article{Hurlbert:1971,
title={The nonconcept of species diversity: a critique and alternative parameters}, volume={52},
number={4},
journal={Ecology},
publisher={Eco Soc America},
author={Hurlbert, S H},
year={1971},
number={4},
journal={Ecology},
publisher={Eco Soc America},
author={Hurlbert, S H},
year={1971},
pages={577--586}
}

Expand Down Expand Up @@ -309,27 +325,27 @@ @article {Bruvo:2004
}

@article{Stoddart:1988,
author = {Stoddart, J.A. and Taylor, J.F.},
author = {Stoddart, J.A. and Taylor, J.F.},
title = {Genotypic diversity: estimation and prediction in samples.},
volume = {118},
number = {4},
pages = {705-11},
year = {1988},
URL = {http://www.genetics.org/content/118/4/705.abstract},
eprint = {http://www.genetics.org/content/118/4/705.full.pdf+html},
journal = {Genetics}
volume = {118},
number = {4},
pages = {705-11},
year = {1988},
URL = {http://www.genetics.org/content/118/4/705.abstract},
eprint = {http://www.genetics.org/content/118/4/705.full.pdf+html},
journal = {Genetics}
}

@article{Nei:1978,
author = {Nei, Masatoshi},
author = {Nei, Masatoshi},
title = {ESTIMATION OF AVERAGE HETEROZYGOSITY AND GENETIC DISTANCE FROM A SMALL NUMBER OF INDIVIDUALS},
volume = {89},
number = {3},
pages = {583-590},
year = {1978},
URL = {http://www.genetics.org/content/89/3/583.abstract},
eprint = {http://www.genetics.org/content/89/3/583.full.pdf+html},
journal = {Genetics}
volume = {89},
number = {3},
pages = {583-590},
year = {1978},
URL = {http://www.genetics.org/content/89/3/583.abstract},
eprint = {http://www.genetics.org/content/89/3/583.full.pdf+html},
journal = {Genetics}
}

@article{Peakall:2006,
Expand All @@ -343,16 +359,16 @@ @article{Peakall:2006
}

@article{Peakall:2012,
author = {Peakall, Rod and Smouse, Peter E.},
author = {Peakall, Rod and Smouse, Peter E.},
title = {{GenAlEx} 6.5: genetic analysis in Excel. Population genetic software for teaching and research鈥攁n update},
volume = {28},
number = {19},
pages = {2537-2539},
year = {2012},
doi = {10.1093/bioinformatics/bts460},
URL = {http://bioinformatics.oxfordjournals.org/content/28/19/2537.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/28/19/2537.full.pdf+html},
journal = {Bioinformatics}
volume = {28},
number = {19},
pages = {2537-2539},
year = {2012},
doi = {10.1093/bioinformatics/bts460},
URL = {http://bioinformatics.oxfordjournals.org/content/28/19/2537.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/28/19/2537.full.pdf+html},
journal = {Bioinformatics}
}

@Manual{vegan,
Expand All @@ -364,16 +380,16 @@ @Manual{vegan
}

@article{Haubold:2000,
author = {Haubold, Bernhard and Hudson, Richard R.},
author = {Haubold, Bernhard and Hudson, Richard R.},
title = {{LIAN} 3.0: detecting linkage disequilibrium in multilocus data},
volume = {16},
number = {9},
pages = {847-849},
year = {2000},
doi = {10.1093/bioinformatics/16.9.847},
URL = {http://bioinformatics.oxfordjournals.org/content/16/9/847.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/16/9/847.full.pdf+html},
journal = {Bioinformatics}
volume = {16},
number = {9},
pages = {847-849},
year = {2000},
doi = {10.1093/bioinformatics/16.9.847},
URL = {http://bioinformatics.oxfordjournals.org/content/16/9/847.abstract},
eprint = {http://bioinformatics.oxfordjournals.org/content/16/9/847.full.pdf+html},
journal = {Bioinformatics}
}

@book{Pielou:1975,
Expand Down

0 comments on commit 98f9708

Please sign in to comment.