#PCAdapt for Outlier Identification library(pcadapt) library(ggman) #load in pcadapt NSCAND <- read.pcadapt("all_no_outgroups.recode.vcf", type = "vcf") #PCA PCs <- pcadapt(NSCAND, ploidy=1, K = 20, method="mahalanobis", LD.clumping = list(size = 200, thr = 0.1), min.maf = 0.025) #Check PC contribution plot(PCs, option = "screeplot") #Plot PC clusters poplist.names <- c("5","6","5","5","3","3","3","6","5","5","3","3","5","5","5","5","5","5","5","6","4","6","6","6","5","3","5","3","4","6","6","5","6","5","5","5","6","4","6","1","5","6","6","6","6","6","5","6","5","5","3","4","4","4","4","4","4","4","4","4","6","6","6","6","6","6","6","4","4","4","4","4","2","2","2","4","4","4","4","4","6","5","6","4","5","6","3","4","5","3","6","6","6","6","6","6","6","6","1","4","6","4","4","4","6","4","4","4","4","4","4","5","5","5","4","4","4","5","5","5","6","6","6","6","6","6","6","6","6","6","6","6","6","4","4","4","4","4","4","4","4","5","4","4","4","4","4","5","4","4","4","4","4","4","4","4","4","4","4","4","4","4","4","4","4","6","2","5","5","2","4","4","4","4","4","4","4","6","6","6","6","6","6","4","6","4","4","4","4","4","6","4","4","4","4","4","4","4","4","4","4","5","4","4","4","6","6","5","4","6","6","6","6","6","6","6","6","6","6","6","6","6","6","6","4","6","5","5","6","6","6","6","5","5","6","6","6","6","6","6","2","2","6","5","5","2","6","6","6","6","6","6","2","6","6","6","6","4","6","6","6","6","6","6","6","5","5","3","5","6","5","3","6","5","5","5","5","5","5","6","4","6","5","5","6","6","4","2","2","4","4","4","3","3","5","5","1","6","1","6","3","3","3","3","3","3","4","4","4","5","4","2","2","3","5","5","5","6","6","6","5","5","5","5","6","6","6","6","6","4","6","6","1","4","6","6","5","5","6","6","6","6","6","4","6","5","5","6","6","6","6","5","4","4","5","5","5") plot(PCs,option="scores", pop=poplist.names) plot(PCs, option = "scores", i = 2, j = 3, pop = poplist.names) plot(PCs, option = "scores", i = 3, j = 4, pop = poplist.names) plot(PCs, option = "scores", i = 4, j = 5, pop = poplist.names) plot(PCs, option = "scores", i = 6, j = 7, pop = poplist.names) plot(PCs, option = "scores", i = 8, j = 9, pop = poplist.names) plot(PCs, option = "scores", i = 9, j = 10, pop = poplist.names) #redo PCs based on screeplot and PC clusters (in this case K=2) PCs <- pcadapt(NSCAND, ploidy=1, K = 2, method="mahalanobis", LD.clumping = list(size = 200, thr = 0.1), min.maf = 0.025) #check loadings for even distribution par(mfrow = c(2, 2)) for (i in 1:4) plot(PCs$loadings[, i], pch = 19, cex = .3, ylab = paste0("Loadings PC", i)) # Manhattan Plot plot(PCs , option = "manhattan") #Q-Q Plot plot(PCs, option = "qqplot") #hist hist(PCs$pvalues, xlab = "p-values", main = NULL, breaks = 50, col = "orange") #stats_dist plot(PCs, option = "stat.distribution") #make a plotting file MAPPOS <- read.delim2("delim2.txt") PVALS <- PCs$pvalues PCMAP <- as.data.frame(cbind(LG = MAPPOS$chrom, pos=MAPPOS$chromrank, pval=PVALS, SNPids=MAPPOS$SNP)) padj <- p.adjust(PCs$pvalues,method="bonferroni") alpha <- 0.1 outliers <- which(padj < alpha) OutliersNSCAND <- PCMAP[outliers,] outliers write.csv(OutliersNSCAND, file = "OutliersNSCAND.csv") # Manhattan Plot plot(PCs , option = "manhattan") ggman(PCMAP, snp = "SNPids", bp = "pos", chrom = "LG", pvalue = "pval", pointSize = 2)