Skip to content

Commit

Permalink
Update gen2dist to deal with multiallelic loci.
Browse files Browse the repository at this point in the history
  • Loading branch information
TomJamesW committed Jun 16, 2020
1 parent 810c7eb commit abdef0a
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 9 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Imports:
methods,
reshape2,
stats,
stringdist,
utils
Suggests:
ape,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ exportMethods(metadiv)
exportMethods(similarity)
exportMethods(subdiv)
import(methods)
import(stringdist)
importFrom(binaryLogic,as.binary)
importFrom(reshape2,melt)
importFrom(stats,na.omit)
Expand Down
39 changes: 31 additions & 8 deletions R/gen2dist.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
#' Converts a vcfR object to a matrix of pairwise genetic distances.
#'
#' @param vcf object of class \code{vcfR}.
#' @param biallelic logical describing whether the data is biallelic or not (default).
#'
#' @return \code{gen2dist(x)} returns an object of class \code{distance}
#' containing a \code{matrix} of pairwise genetic distances.
#' @export
#'
gen2dist <- function(vcf) {
gen2dist <- function(vcf, biallelic = FALSE) {

if("vcfR" %in% rownames(utils::installed.packages()) == FALSE){
stop("gen2dist() requires the package vcfR")}
Expand All @@ -17,15 +18,37 @@ gen2dist <- function(vcf) {
}
#extract genotype information
gendat <- vcfR::extract.gt(vcf)
#swap slashes for pipes
gendat <- sub('/','|',gendat)
#recode missing data as no mutation
gendat[is.na(gendat)] <- '0|0'
#recode strings as digits
gendat[gendat == "0|0"] <- 0
gendat[gendat == "1|0"] <- 1
gendat[gendat == "0|1"] <- 1
gendat[gendat == "1|1"] <- 2
#calculate distance matrix
dist <- as.matrix(stats::dist(t(gendat), method = 'manhattan'))
if (biallelic == TRUE){
#recode strings as digits
gendat[gendat == "0|0"] <- 0
gendat[gendat == "1|0"] <- 1
gendat[gendat == "0|1"] <- 1
gendat[gendat == "1|1"] <- 2
#calculate distance matrix
dist <- as.matrix(stats::dist(t(gendat), method = 'manhattan'))}
if (biallelic == FALSE){
#recode strings so as highest gt is first
gendat[gendat == "0|1"] <- "1|0"
gendat[gendat == "0|2"] <- "2|0"
gendat[gendat == "0|3"] <- "3|0"
gendat[gendat == "0|4"] <- "4|0"
gendat[gendat == "1|2"] <- "2|1"
gendat[gendat == "1|3"] <- "3|1"
gendat[gendat == "1|4"] <- "4|1"
gendat[gendat == "2|3"] <- "3|2"
gendat[gendat == "2|4"] <- "4|2"
gendat[gendat == "3|4"] <- "4|3"
#remove rownames
rownames(gendat) <- c()
#change matrix to list of strings
genlist <- lapply(seq_len(ncol(gendat)), function(i) gendat[,i])
#calculate distance matrix
dist <- as.matrix(stringdist::stringdistmatrix(as.character(genlist), method = 'hamming'))
}
#return distance object
return(new("distance",
distance = dist,
Expand Down
1 change: 1 addition & 0 deletions R/rdiversity-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@
#' @importFrom reshape2 melt
#' @importFrom stats na.omit
#' @importFrom utils head installed.packages
#' @import stringdist
#'
NULL
4 changes: 3 additions & 1 deletion man/gen2dist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit abdef0a

Please sign in to comment.