#PCAdapt for Outlier Identification
library(pcadapt)
library(ggman)

#load in pcadapt
NSCAND <- read.pcadapt("all_no_outgroups.recode.vcf", type = "vcf")

#PCA
PCs <- pcadapt(NSCAND, ploidy=1, K = 20, method="mahalanobis", LD.clumping = list(size = 200, thr = 0.1), min.maf = 0.025)
#Check PC contribution
plot(PCs, option = "screeplot")

#Plot PC clusters
poplist.names <- c("5","6","5","5","3","3","3","6","5","5","3","3","5","5","5","5","5","5","5","6","4","6","6","6","5","3","5","3","4","6","6","5","6","5","5","5","6","4","6","1","5","6","6","6","6","6","5","6","5","5","3","4","4","4","4","4","4","4","4","4","6","6","6","6","6","6","6","4","4","4","4","4","2","2","2","4","4","4","4","4","6","5","6","4","5","6","3","4","5","3","6","6","6","6","6","6","6","6","1","4","6","4","4","4","6","4","4","4","4","4","4","5","5","5","4","4","4","5","5","5","6","6","6","6","6","6","6","6","6","6","6","6","6","4","4","4","4","4","4","4","4","5","4","4","4","4","4","5","4","4","4","4","4","4","4","4","4","4","4","4","4","4","4","4","4","6","2","5","5","2","4","4","4","4","4","4","4","6","6","6","6","6","6","4","6","4","4","4","4","4","6","4","4","4","4","4","4","4","4","4","4","5","4","4","4","6","6","5","4","6","6","6","6","6","6","6","6","6","6","6","6","6","6","6","4","6","5","5","6","6","6","6","5","5","6","6","6","6","6","6","2","2","6","5","5","2","6","6","6","6","6","6","2","6","6","6","6","4","6","6","6","6","6","6","6","5","5","3","5","6","5","3","6","5","5","5","5","5","5","6","4","6","5","5","6","6","4","2","2","4","4","4","3","3","5","5","1","6","1","6","3","3","3","3","3","3","4","4","4","5","4","2","2","3","5","5","5","6","6","6","5","5","5","5","6","6","6","6","6","4","6","6","1","4","6","6","5","5","6","6","6","6","6","4","6","5","5","6","6","6","6","5","4","4","5","5","5")
plot(PCs,option="scores", pop=poplist.names)
plot(PCs, option = "scores", i = 2, j = 3, pop = poplist.names)
plot(PCs, option = "scores", i = 3, j = 4, pop = poplist.names)
plot(PCs, option = "scores", i = 4, j = 5, pop = poplist.names)
plot(PCs, option = "scores", i = 6, j = 7, pop = poplist.names)
plot(PCs, option = "scores", i = 8, j = 9, pop = poplist.names)
plot(PCs, option = "scores", i = 9, j = 10, pop = poplist.names)

#redo PCs based on screeplot and PC clusters (in this case K=2)
PCs <- pcadapt(NSCAND, ploidy=1, K = 2, method="mahalanobis", LD.clumping = list(size = 200, thr = 0.1), min.maf = 0.025)

#check loadings for even distribution
par(mfrow = c(2, 2))
for (i in 1:4)
  plot(PCs$loadings[, i], pch = 19, cex = .3, ylab = paste0("Loadings PC", i))

# Manhattan Plot
plot(PCs , option = "manhattan")

#Q-Q Plot
plot(PCs, option = "qqplot")

#hist
hist(PCs$pvalues, xlab = "p-values", main = NULL, breaks = 50, col = "orange")

#stats_dist
plot(PCs, option = "stat.distribution")

#make a plotting file
MAPPOS <- read.delim2("delim2.txt")
PVALS <- PCs$pvalues
PCMAP <- as.data.frame(cbind(LG = MAPPOS$chrom, pos=MAPPOS$chromrank, pval=PVALS, SNPids=MAPPOS$SNP))
padj <- p.adjust(PCs$pvalues,method="bonferroni")
alpha <- 0.1
outliers <- which(padj < alpha)
OutliersNSCAND <- PCMAP[outliers,]
outliers
write.csv(OutliersNSCAND, file = "OutliersNSCAND.csv")


# Manhattan Plot
plot(PCs , option = "manhattan")

ggman(PCMAP, snp = "SNPids", bp = "pos", chrom = "LG", pvalue = "pval", pointSize = 2)