-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit af77bc5
Showing
62 changed files
with
5,233 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
Package: gwid | ||
Type: Package | ||
Title: Genome-Wide Identity-by-Descent | ||
Version: 0.1.0 | ||
Authors@R: c( | ||
person("Soroush", "Mahmoudiandehkordi", email = "soroushmdg@gmail.com", role = c("aut", "cre")), | ||
person("Mehdi", "Maadooliat", email = "mehdi.maadooliat@mu.edu", role = "aut"), | ||
person("Steven", "Schrodi", email = "schrodi@wisc.edu", role = "aut") | ||
) | ||
Maintainer: Soroush Mahmoudiandehkordi <soroushmdg@gmail.com> | ||
Description: Methods and tools for the analysis of Genome Wide | ||
Identity-by-Descent ('gwid') mapping data, focusing on testing whether there | ||
is a higher occurrence of Identity-By-Descent (IBD) segments around potential causal variants | ||
in cases compared to controls, which is crucial for identifying rare | ||
variants. To enhance its analytical power, 'gwid' incorporates a Sliding | ||
Window Approach, allowing for the detection and analysis of signals from | ||
multiple Single Nucleotide Polymorphisms (SNPs). | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
Imports: data.table, gdsfmt, SNPRelate, Matrix, ggplot2, plotly, utils, | ||
stats, RcppRoll, methods, piggyback | ||
RoxygenNote: 7.2.3 | ||
Suggests: knitr, magrittr, rmarkdown, testthat (>= 3.0.0) | ||
Config/testthat/edition: 3 | ||
URL: https://github.com/soroushmdg/gwid | ||
BugReports: https://github.com/soroushmdg/gwid/issues | ||
NeedsCompilation: no | ||
Packaged: 2023-12-14 14:00:32 UTC; soroush | ||
Author: Soroush Mahmoudiandehkordi [aut, cre], | ||
Mehdi Maadooliat [aut], | ||
Steven Schrodi [aut] | ||
Repository: CRAN | ||
Date/Publication: 2023-12-14 16:40:08 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: gwid authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
2339c05f9e3c435f07321a4e762e8b52 *DESCRIPTION | ||
a4f8c4ebcedf71886abfb0f38d00582f *LICENSE | ||
7b30899cf199107d3f1fc47561c08705 *NAMESPACE | ||
2c3b2daa370d785e4f6dbde49476479b *NEWS.md | ||
6332b6ecfc4e7cc19c5da8fb2b1341d0 *R/build.R | ||
64c942e25f6d91d81669aafd0061facb *R/case_control.R | ||
740150a99e1b2efd66111d00740f5af6 *R/extract.R | ||
9da2728f4963e16ec0c11d243f9e1816 *R/haplotype_structure.R | ||
db170888f9cc8c1297063a10ceea3fa7 *R/hypothesis_test.R | ||
c60fdb80ce7bf582b26759d7f4f307f2 *R/plot.R | ||
f3b775e7d4b6e1c14ee69b5bf119447f *R/roh.R | ||
7bafd82be04ae30ae5799949be0622f7 *R/utils.R | ||
2240665d0af2cff75a432b3e9bc3a456 *README.md | ||
d4bbb420e24462d9319a33b5531c5c4c *man/build_gwas.Rd | ||
ee5e433c3343b0b232be4c386ca465fd *man/build_gwid.Rd | ||
5dc565005a8dd793ec3b21804d1e428b *man/build_phase.Rd | ||
c73a0350e14bec0bcc607e44b1b4d6bf *man/case_control.Rd | ||
8cd64096bdf69a922f395522623aa7c3 *man/extract.Rd | ||
746795c025f2ef6bc9621b35f57dec8c *man/extract.gwas.Rd | ||
628e46a65fb21880f88e68ee5d25ac28 *man/extract.gwid.Rd | ||
8c86fbe77bf43f0edd9232a55233a69b *man/extract_window.Rd | ||
73fd0f0127b04dab671e23d7be1e3e41 *man/extract_window.gwid.Rd | ||
6c0af988b8bea1fd105fc12cec041a5f *man/figures/README-example-1.png | ||
732e993b474ae19fd0b7ce2ef1d90ef0 *man/figures/README-example-2.png | ||
32cb276b6dcaf4f2afe9e06868a42cfa *man/figures/README-pressure-1.png | ||
195b676278ac65fd468b45b0b52c0f98 *man/figures/README-unnamed-chunk-4-1.png | ||
508a19cb0d595555d3c618c4601ce586 *man/figures/README-unnamed-chunk-4-2.png | ||
802018f087159e0bba40cf7a3f7aa562 *man/figures/README-unnamed-chunk-5-1.png | ||
3f5983f80814997f046474978b4ef880 *man/figures/final-copy-arrow.png | ||
a4b18d3f03e016f5d59a57ac951d62f2 *man/figures/hap_version1.png | ||
80ac1f8a4539bba99891dc76bad27be6 *man/fisher_test.Rd | ||
2170249144341d373f5973a793c09a35 *man/fisher_test.gwas.Rd | ||
1f687f8710bb3a7874dedb77da38f9ff *man/fisher_test.gwid.Rd | ||
0cb0bd11b4219d9b93e9c7d3bf759836 *man/fisher_test.result_snps.Rd | ||
e1a9c01cb8ae9f13ba1b79889aa9eb0e *man/gtest.Rd | ||
48aaa27ffa2d3a1e584d506434ada59f *man/gtest.haplotype_structure.Rd | ||
bf58970bbf0957697a48eccdd128bace *man/haplotype_frequency.Rd | ||
ad7ceda200a69d0a9a4a80bfcab648ac *man/haplotype_frequency.haplotype_structure.Rd | ||
5c3dea5a88221fccc41b522535bac879 *man/haplotype_structure.Rd | ||
6712c553404cc28c0994c1f9d79277b0 *man/haplotype_structure.gwas.Rd | ||
08ae515453217c6ead1ac9fb54934eed *man/haplotype_structure.gwid.Rd | ||
ed4175daae00ef492c3dd2b3b0deee91 *man/mcnemar_test.Rd | ||
d1d21173d35a9462d5348f41dd5117b5 *man/mcnemar_test.result_snps.Rd | ||
3c6d51be7b3122bbbce39b41d73b27f6 *man/mcnemar_test_permut.Rd | ||
7895d66099567bf491a0fa33051d047b *man/mcnemar_test_permut.result_snps.Rd | ||
6111173128a04fc7897e991267425de7 *man/permutation_test.Rd | ||
5d4652c85313b816dbfcc857dbe41e7a *man/permutation_test.gwas.Rd | ||
6c481fb77f8c389ecf2a18577f133828 *man/permutation_test.gwid.Rd | ||
9bb38a92a315d39c6c8be63c5f1f6955 *man/permutation_test.haplotype_structure.Rd | ||
133b060317998c060c07c3c285942670 *man/plot.gwas.Rd | ||
81ab70e931c0c38d2b4e50d406c1722c *man/plot.gwid.Rd | ||
1f4f27db65a8adbee93d4517ed3bfe64 *man/plot.haplotype_frequency.Rd | ||
129fbbfce42dc62d88638aa6dbab9209 *man/plot.haplotype_structure_frequency.Rd | ||
7d1cf05ccfed954b4bfe07c94d4f0dd4 *man/plot.result_snps.Rd | ||
d6c8dfb4a82c4bb88e90acbd49454229 *man/plot.test_snps.Rd | ||
415498f77efa020ce443c0a18f86db30 *man/print.Rd | ||
220c303504c370aaaa55efd2469aea31 *man/print.gwas.Rd | ||
c39de9a46e35312c0bb7c2f19ef2e487 *man/roh.Rd | ||
57ac3a33d8c7c9984cf155bd6a303742 *man/roh.phase.Rd | ||
5ca6eeeded4ff58448c72bb37d092b89 *man/subset.Rd | ||
a688861e104fd57dd6e6d4edaff1f11d *man/subset.gwid.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(extract,gwas) | ||
S3method(extract,gwid) | ||
S3method(extract_window,gwid) | ||
S3method(fisher_test,gwas) | ||
S3method(fisher_test,gwid) | ||
S3method(fisher_test,result_snps) | ||
S3method(gtest,haplotype_structure) | ||
S3method(haplotype_frequency,haplotype_structure) | ||
S3method(haplotype_structure,gwas) | ||
S3method(haplotype_structure,gwid) | ||
S3method(mcnemar_test,result_snps) | ||
S3method(mcnemar_test_permut,result_snps) | ||
S3method(permutation_test,gwas) | ||
S3method(permutation_test,gwid) | ||
S3method(permutation_test,haplotype_structure) | ||
S3method(plot,gwas) | ||
S3method(plot,gwid) | ||
S3method(plot,haplotype_frequency) | ||
S3method(plot,haplotype_structure_frequency) | ||
S3method(plot,result_snps) | ||
S3method(plot,test_snps) | ||
S3method(print,gwas) | ||
S3method(roh,phase) | ||
S3method(subset,gwid) | ||
export(build_gwas) | ||
export(build_gwid) | ||
export(build_phase) | ||
export(case_control) | ||
export(extract) | ||
export(extract_window) | ||
export(fisher_test) | ||
export(gtest) | ||
export(haplotype_frequency) | ||
export(haplotype_structure) | ||
export(mcnemar_test) | ||
export(mcnemar_test_permut) | ||
export(permutation_test) | ||
export(print) | ||
export(roh) | ||
export(subset) | ||
import(Matrix) | ||
import(data.table) | ||
importFrom(piggyback,pb_download) | ||
importFrom(stats,fisher.test) | ||
importFrom(stats,quantile) | ||
importFrom(stats,xtabs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# gwid 0.1.0 | ||
|
||
# gwid 0.0.2 | ||
|
||
* Initial CRAN submission. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
#' Open a SNP GDS file and extract information. | ||
#' | ||
#' @param gds_data File name | ||
#' | ||
#' @param caco An object of class caco. Output of \code{case_control} function. | ||
#' @param gwas_generator logical; if \code{TRUE} an object of class result_snps | ||
#' will be saved inside output list. | ||
#' | ||
#' @return a list of seven objects; including smp.id, snp.id, snp.pos, smp.indx, | ||
#' smp.snp (a matrix with samples in rows and snp in columns), caco, | ||
#' snps(column sum of smp.snp for each case control) | ||
#' | ||
#' | ||
#' @export | ||
build_gwas <- function(gds_data = "name.gds", caco = "name.Rda", gwas_generator = TRUE) { | ||
if (missing(caco)) { | ||
stop(" provide case_control list object or case_control rda (contains list of case_control) file name ") | ||
} | ||
if (!is.list(caco)) { | ||
caco <- case_control(caco) | ||
} | ||
genoRA <- SNPRelate::snpgdsOpen(gds_data) | ||
smp.id <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "sample.id")) | ||
snp.id <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "snp.rs.id")) | ||
snp.pos <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "snp.position")) | ||
smp.indx <- which(smp.id %in% unique(unlist(caco))) | ||
smp.snp <- list() | ||
for (j in 1:length(caco)) { | ||
smp.snp[[j]] <- (gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "genotype"))[which(smp.id[smp.indx] %in% caco[[j]]), ]) | ||
rownames(smp.snp[[j]]) <- smp.id[which(smp.id[smp.indx] %in% caco[[j]])] | ||
colnames(smp.snp[[j]]) <- snp.id | ||
smp.snp[[j]][smp.snp[[j]] == 3] <- NA | ||
} | ||
names(smp.snp) <- names(caco) | ||
SNPRelate::snpgdsClose(genoRA) | ||
output <- list(smp.id = smp.id, snp.id = snp.id, snp.pos = snp.pos, smp.indx = smp.indx, smp.snp = smp.snp, caco = caco) | ||
class(output) <- "gwas" | ||
if (gwas_generator) { | ||
output$snps <- extract(output) | ||
} | ||
return(output) | ||
} | ||
|
||
|
||
#' Read .vcf structured text format files and reduce the size of file. | ||
#' | ||
#' @param phased_vcf A file name for a variant call format (vcf) file. | ||
#' | ||
#' @param caco An object of class caco. Output of \code{case_control} function. | ||
#' | ||
#' @return the output will be a a list of class phase contains two sparse matrix | ||
#' for each haplotype. | ||
#' | ||
#' @export | ||
build_phase <- function(phased_vcf = "name.vcf", caco) { | ||
if (missing(caco) || is.null(phased_vcf)) stop("case_control and 'phased vcf' are needed") | ||
phased <- vector(mode = "list", length = 2) | ||
# read only colnames (because of memory issues) | ||
tmp <- data.table::fread(phased_vcf, nrows = 0) | ||
# select those samples who are in caco (only relevant samples) | ||
# tmp2 <- data.table::fread(phased_vcf,select = which(colnames(tmp) %in% snp_smp$smp.id[snp_smp$smp.indx])) | ||
tmp2 <- data.table::fread(phased_vcf, select = which(colnames(tmp) %in% unique(unlist(caco)))) | ||
# find row and column index of 1's (zeros generate in sparse matrix) | ||
ind1 <- which(tmp2[, lapply(.SD, substr, 1, 1)] == "1", arr.ind = TRUE) | ||
ind2 <- which(tmp2[, lapply(.SD, substr, 3, 3)] == "1", arr.ind = TRUE) | ||
phased[[1]] <- Matrix::sparseMatrix(i = ind1[, 1], j = ind1[, 2], x = 1, dims = c(nrow(tmp2), ncol(tmp2))) | ||
phased[[2]] <- Matrix::sparseMatrix(i = ind2[, 1], j = ind2[, 2], x = 1, dims = c(nrow(tmp2), ncol(tmp2))) | ||
colnames(phased[[1]]) <- colnames(phased[[2]]) <- colnames(tmp2) | ||
names(phased) <- c("Hap.1", "Hap.2") | ||
class(phased) <- "phase" | ||
return(phased) | ||
} | ||
|
||
#' Open a ibd file and extract information. | ||
#' | ||
#' @param ibd_data a file name for output of \href{http://faculty.washington.edu/browning/refined-ibd.html}{Refined IBD} | ||
#' | ||
#' @param gwas object of class gwas | ||
#' @param gwid_generator logical; if \code{TRUE} an object of class result_snps | ||
#' will be saved inside output list. | ||
#' | ||
#' @return the output will be a object(list) of class gwid contains | ||
#' profile object, IBD object and result_snps object. | ||
#' | ||
#' @export | ||
build_gwid <- function(ibd_data = "name.ibd", gwas = "object of class gwas", gwid_generator = TRUE) { | ||
ibd <- data.table::fread(ibd_data) | ||
V1 <- V2 <- V3 <- V4 <- V5 <- V6 <- V7 <- V8 <- V9 <- NULL | ||
ibd <- ibd[V1 %in% unlist(unique(gwas[["caco"]])) & V3 %in% unlist(unique(gwas[["caco"]]))] | ||
class(ibd) <- append("IBD", class(ibd)) | ||
seq2 <- Vectorize(seq.default, vectorize.args = c("from", "to")) | ||
profile <- ind <- vector(mode = "list", length = length(gwas[["caco"]])) # list length 6 | ||
for (j in seq_along(gwas[["caco"]])) { | ||
ind[[j]] <- which(ibd$V1 %in% gwas[["caco"]][[j]] & ibd$V3 %in% gwas[["caco"]][[j]]) | ||
a1 <- ibd[V1 %in% gwas[["caco"]][[j]] & V3 %in% gwas[["caco"]][[j]]] | ||
a2 <- seq2(match(a1$V6, gwas$snp.pos), match(a1$V7, gwas$snp.pos)) | ||
Un1 <- unlist(a2) | ||
profile[[j]] <- Matrix::sparseMatrix( | ||
i = rep(seq_along(a2), lengths(a2)), | ||
j = Un1, | ||
x = 1 | ||
) | ||
if (ncol(profile[[j]]) < length(gwas$snp.pos)) { | ||
if (min(Un1) > 1) { | ||
mytemp <- Matrix(0, nrow = nrow(profile[[j]]), ncol = (min(Un1) - 1)) | ||
profile[[j]] <- cbind(mytemp, profile[[j]]) | ||
} | ||
|
||
if (max(Un1) < length(gwas$snp.pos)) { | ||
mytemp <- Matrix(0, nrow = nrow(profile[[j]]), ncol = (length(gwas$snp.pos) - max(Un1))) | ||
profile[[j]] <- cbind(profile[[j]], mytemp) | ||
} | ||
|
||
profile[[j]] <- methods::as(profile[[j]], "sparseMatrix") | ||
} | ||
} | ||
|
||
names(profile) <- names(ind) <- names(gwas[["caco"]]) | ||
|
||
class(profile) <- "profile" | ||
output <- list(profile = profile, IND = ind) | ||
output$snp_pos <- gwas[["snp.pos"]] | ||
output$ibd <- ibd | ||
class(output) <- "gwid" | ||
if (gwid_generator) { | ||
output$res <- extract(output) | ||
} | ||
return(output) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
csv2Rdat <- function(name = "", rep = 3) { | ||
if (!file.exists(name)) message("File doesn't exist") | ||
caco <- list() | ||
output <- list() | ||
input.data <- utils::read.csv(name) | ||
check.column <- function(v) { | ||
return(sum(c("1", "2") %in% as.integer(v))) | ||
} | ||
cacos <- names(which(apply(input.data, 2, check.column) == 2)) | ||
|
||
for (i in 1:length(cacos)) { | ||
caco$cases <- input.data$SampleName[which(input.data[, cacos[i]] == 2)] | ||
cont.ind <- which(input.data[, cacos[i]] == 1) | ||
if (length(caco$cases) * rep > length(cont.ind)) caco$cases <- sample(caco$cases, trunc(length(cont.ind) / rep)) | ||
conts <- sample(cont.ind, length(caco$cases) * rep) | ||
for (j in 1:rep) { | ||
caco[[paste0("cont", j)]] <- input.data$SampleName[conts[rep(1:rep, each = length(caco$cases)) == j]] | ||
} | ||
output[[cacos[i]]] <- caco | ||
|
||
} | ||
output | ||
} | ||
|
||
|
||
#' Reload saved case-control list file | ||
#' | ||
#' @param case_control_rda A character string giving the name of the case-control | ||
#' file to load. The file is a list of character vectors including subject names | ||
#' in each case-control groups or csv file including subject name for a disease. | ||
#' | ||
#' @param ... name of a column (disease name) of csv file. | ||
#' | ||
#' @return The output will be a list of character vectors include subject names | ||
#' and groups. The class of returned object is caco. | ||
#' | ||
#' @export | ||
case_control <- function(case_control_rda, ...) { | ||
if (missing(case_control_rda)) { | ||
stop("provide a case_control list (rda file)") | ||
} | ||
substrRight <- function(x, n) { | ||
substr(x, nchar(x) - n + 1, nchar(x)) | ||
} | ||
if (substrRight(case_control_rda,3) == "csv") { | ||
output <- csv2Rdat(case_control_rda)[[...]] | ||
class(output) <- "caco" | ||
return(output) | ||
} else { | ||
output <- get(load(case_control_rda)) | ||
class(output) <- "caco" | ||
return(output) | ||
} | ||
} |
Oops, something went wrong.