Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Soroush Mahmoudiandehkordi authored and cran-robot committed Dec 15, 2023
0 parents commit af77bc5
Show file tree
Hide file tree
Showing 62 changed files with 5,233 additions and 0 deletions.
33 changes: 33 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Package: gwid
Type: Package
Title: Genome-Wide Identity-by-Descent
Version: 0.1.0
Authors@R: c(
person("Soroush", "Mahmoudiandehkordi", email = "soroushmdg@gmail.com", role = c("aut", "cre")),
person("Mehdi", "Maadooliat", email = "mehdi.maadooliat@mu.edu", role = "aut"),
person("Steven", "Schrodi", email = "schrodi@wisc.edu", role = "aut")
)
Maintainer: Soroush Mahmoudiandehkordi <soroushmdg@gmail.com>
Description: Methods and tools for the analysis of Genome Wide
Identity-by-Descent ('gwid') mapping data, focusing on testing whether there
is a higher occurrence of Identity-By-Descent (IBD) segments around potential causal variants
in cases compared to controls, which is crucial for identifying rare
variants. To enhance its analytical power, 'gwid' incorporates a Sliding
Window Approach, allowing for the detection and analysis of signals from
multiple Single Nucleotide Polymorphisms (SNPs).
License: MIT + file LICENSE
Encoding: UTF-8
Imports: data.table, gdsfmt, SNPRelate, Matrix, ggplot2, plotly, utils,
stats, RcppRoll, methods, piggyback
RoxygenNote: 7.2.3
Suggests: knitr, magrittr, rmarkdown, testthat (>= 3.0.0)
Config/testthat/edition: 3
URL: https://github.com/soroushmdg/gwid
BugReports: https://github.com/soroushmdg/gwid/issues
NeedsCompilation: no
Packaged: 2023-12-14 14:00:32 UTC; soroush
Author: Soroush Mahmoudiandehkordi [aut, cre],
Mehdi Maadooliat [aut],
Steven Schrodi [aut]
Repository: CRAN
Date/Publication: 2023-12-14 16:40:08 UTC
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: gwid authors
61 changes: 61 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
2339c05f9e3c435f07321a4e762e8b52 *DESCRIPTION
a4f8c4ebcedf71886abfb0f38d00582f *LICENSE
7b30899cf199107d3f1fc47561c08705 *NAMESPACE
2c3b2daa370d785e4f6dbde49476479b *NEWS.md
6332b6ecfc4e7cc19c5da8fb2b1341d0 *R/build.R
64c942e25f6d91d81669aafd0061facb *R/case_control.R
740150a99e1b2efd66111d00740f5af6 *R/extract.R
9da2728f4963e16ec0c11d243f9e1816 *R/haplotype_structure.R
db170888f9cc8c1297063a10ceea3fa7 *R/hypothesis_test.R
c60fdb80ce7bf582b26759d7f4f307f2 *R/plot.R
f3b775e7d4b6e1c14ee69b5bf119447f *R/roh.R
7bafd82be04ae30ae5799949be0622f7 *R/utils.R
2240665d0af2cff75a432b3e9bc3a456 *README.md
d4bbb420e24462d9319a33b5531c5c4c *man/build_gwas.Rd
ee5e433c3343b0b232be4c386ca465fd *man/build_gwid.Rd
5dc565005a8dd793ec3b21804d1e428b *man/build_phase.Rd
c73a0350e14bec0bcc607e44b1b4d6bf *man/case_control.Rd
8cd64096bdf69a922f395522623aa7c3 *man/extract.Rd
746795c025f2ef6bc9621b35f57dec8c *man/extract.gwas.Rd
628e46a65fb21880f88e68ee5d25ac28 *man/extract.gwid.Rd
8c86fbe77bf43f0edd9232a55233a69b *man/extract_window.Rd
73fd0f0127b04dab671e23d7be1e3e41 *man/extract_window.gwid.Rd
6c0af988b8bea1fd105fc12cec041a5f *man/figures/README-example-1.png
732e993b474ae19fd0b7ce2ef1d90ef0 *man/figures/README-example-2.png
32cb276b6dcaf4f2afe9e06868a42cfa *man/figures/README-pressure-1.png
195b676278ac65fd468b45b0b52c0f98 *man/figures/README-unnamed-chunk-4-1.png
508a19cb0d595555d3c618c4601ce586 *man/figures/README-unnamed-chunk-4-2.png
802018f087159e0bba40cf7a3f7aa562 *man/figures/README-unnamed-chunk-5-1.png
3f5983f80814997f046474978b4ef880 *man/figures/final-copy-arrow.png
a4b18d3f03e016f5d59a57ac951d62f2 *man/figures/hap_version1.png
80ac1f8a4539bba99891dc76bad27be6 *man/fisher_test.Rd
2170249144341d373f5973a793c09a35 *man/fisher_test.gwas.Rd
1f687f8710bb3a7874dedb77da38f9ff *man/fisher_test.gwid.Rd
0cb0bd11b4219d9b93e9c7d3bf759836 *man/fisher_test.result_snps.Rd
e1a9c01cb8ae9f13ba1b79889aa9eb0e *man/gtest.Rd
48aaa27ffa2d3a1e584d506434ada59f *man/gtest.haplotype_structure.Rd
bf58970bbf0957697a48eccdd128bace *man/haplotype_frequency.Rd
ad7ceda200a69d0a9a4a80bfcab648ac *man/haplotype_frequency.haplotype_structure.Rd
5c3dea5a88221fccc41b522535bac879 *man/haplotype_structure.Rd
6712c553404cc28c0994c1f9d79277b0 *man/haplotype_structure.gwas.Rd
08ae515453217c6ead1ac9fb54934eed *man/haplotype_structure.gwid.Rd
ed4175daae00ef492c3dd2b3b0deee91 *man/mcnemar_test.Rd
d1d21173d35a9462d5348f41dd5117b5 *man/mcnemar_test.result_snps.Rd
3c6d51be7b3122bbbce39b41d73b27f6 *man/mcnemar_test_permut.Rd
7895d66099567bf491a0fa33051d047b *man/mcnemar_test_permut.result_snps.Rd
6111173128a04fc7897e991267425de7 *man/permutation_test.Rd
5d4652c85313b816dbfcc857dbe41e7a *man/permutation_test.gwas.Rd
6c481fb77f8c389ecf2a18577f133828 *man/permutation_test.gwid.Rd
9bb38a92a315d39c6c8be63c5f1f6955 *man/permutation_test.haplotype_structure.Rd
133b060317998c060c07c3c285942670 *man/plot.gwas.Rd
81ab70e931c0c38d2b4e50d406c1722c *man/plot.gwid.Rd
1f4f27db65a8adbee93d4517ed3bfe64 *man/plot.haplotype_frequency.Rd
129fbbfce42dc62d88638aa6dbab9209 *man/plot.haplotype_structure_frequency.Rd
7d1cf05ccfed954b4bfe07c94d4f0dd4 *man/plot.result_snps.Rd
d6c8dfb4a82c4bb88e90acbd49454229 *man/plot.test_snps.Rd
415498f77efa020ce443c0a18f86db30 *man/print.Rd
220c303504c370aaaa55efd2469aea31 *man/print.gwas.Rd
c39de9a46e35312c0bb7c2f19ef2e487 *man/roh.Rd
57ac3a33d8c7c9984cf155bd6a303742 *man/roh.phase.Rd
5ca6eeeded4ff58448c72bb37d092b89 *man/subset.Rd
a688861e104fd57dd6e6d4edaff1f11d *man/subset.gwid.Rd
48 changes: 48 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Generated by roxygen2: do not edit by hand

S3method(extract,gwas)
S3method(extract,gwid)
S3method(extract_window,gwid)
S3method(fisher_test,gwas)
S3method(fisher_test,gwid)
S3method(fisher_test,result_snps)
S3method(gtest,haplotype_structure)
S3method(haplotype_frequency,haplotype_structure)
S3method(haplotype_structure,gwas)
S3method(haplotype_structure,gwid)
S3method(mcnemar_test,result_snps)
S3method(mcnemar_test_permut,result_snps)
S3method(permutation_test,gwas)
S3method(permutation_test,gwid)
S3method(permutation_test,haplotype_structure)
S3method(plot,gwas)
S3method(plot,gwid)
S3method(plot,haplotype_frequency)
S3method(plot,haplotype_structure_frequency)
S3method(plot,result_snps)
S3method(plot,test_snps)
S3method(print,gwas)
S3method(roh,phase)
S3method(subset,gwid)
export(build_gwas)
export(build_gwid)
export(build_phase)
export(case_control)
export(extract)
export(extract_window)
export(fisher_test)
export(gtest)
export(haplotype_frequency)
export(haplotype_structure)
export(mcnemar_test)
export(mcnemar_test_permut)
export(permutation_test)
export(print)
export(roh)
export(subset)
import(Matrix)
import(data.table)
importFrom(piggyback,pb_download)
importFrom(stats,fisher.test)
importFrom(stats,quantile)
importFrom(stats,xtabs)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# gwid 0.1.0

# gwid 0.0.2

* Initial CRAN submission.
129 changes: 129 additions & 0 deletions R/build.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#' Open a SNP GDS file and extract information.
#'
#' @param gds_data File name
#'
#' @param caco An object of class caco. Output of \code{case_control} function.
#' @param gwas_generator logical; if \code{TRUE} an object of class result_snps
#' will be saved inside output list.
#'
#' @return a list of seven objects; including smp.id, snp.id, snp.pos, smp.indx,
#' smp.snp (a matrix with samples in rows and snp in columns), caco,
#' snps(column sum of smp.snp for each case control)
#'
#'
#' @export
build_gwas <- function(gds_data = "name.gds", caco = "name.Rda", gwas_generator = TRUE) {
if (missing(caco)) {
stop(" provide case_control list object or case_control rda (contains list of case_control) file name ")
}
if (!is.list(caco)) {
caco <- case_control(caco)
}
genoRA <- SNPRelate::snpgdsOpen(gds_data)
smp.id <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "sample.id"))
snp.id <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "snp.rs.id"))
snp.pos <- gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "snp.position"))
smp.indx <- which(smp.id %in% unique(unlist(caco)))
smp.snp <- list()
for (j in 1:length(caco)) {
smp.snp[[j]] <- (gdsfmt::read.gdsn(gdsfmt::index.gdsn(genoRA, "genotype"))[which(smp.id[smp.indx] %in% caco[[j]]), ])
rownames(smp.snp[[j]]) <- smp.id[which(smp.id[smp.indx] %in% caco[[j]])]
colnames(smp.snp[[j]]) <- snp.id
smp.snp[[j]][smp.snp[[j]] == 3] <- NA
}
names(smp.snp) <- names(caco)
SNPRelate::snpgdsClose(genoRA)
output <- list(smp.id = smp.id, snp.id = snp.id, snp.pos = snp.pos, smp.indx = smp.indx, smp.snp = smp.snp, caco = caco)
class(output) <- "gwas"
if (gwas_generator) {
output$snps <- extract(output)
}
return(output)
}


#' Read .vcf structured text format files and reduce the size of file.
#'
#' @param phased_vcf A file name for a variant call format (vcf) file.
#'
#' @param caco An object of class caco. Output of \code{case_control} function.
#'
#' @return the output will be a a list of class phase contains two sparse matrix
#' for each haplotype.
#'
#' @export
build_phase <- function(phased_vcf = "name.vcf", caco) {
if (missing(caco) || is.null(phased_vcf)) stop("case_control and 'phased vcf' are needed")
phased <- vector(mode = "list", length = 2)
# read only colnames (because of memory issues)
tmp <- data.table::fread(phased_vcf, nrows = 0)
# select those samples who are in caco (only relevant samples)
# tmp2 <- data.table::fread(phased_vcf,select = which(colnames(tmp) %in% snp_smp$smp.id[snp_smp$smp.indx]))
tmp2 <- data.table::fread(phased_vcf, select = which(colnames(tmp) %in% unique(unlist(caco))))
# find row and column index of 1's (zeros generate in sparse matrix)
ind1 <- which(tmp2[, lapply(.SD, substr, 1, 1)] == "1", arr.ind = TRUE)
ind2 <- which(tmp2[, lapply(.SD, substr, 3, 3)] == "1", arr.ind = TRUE)
phased[[1]] <- Matrix::sparseMatrix(i = ind1[, 1], j = ind1[, 2], x = 1, dims = c(nrow(tmp2), ncol(tmp2)))
phased[[2]] <- Matrix::sparseMatrix(i = ind2[, 1], j = ind2[, 2], x = 1, dims = c(nrow(tmp2), ncol(tmp2)))
colnames(phased[[1]]) <- colnames(phased[[2]]) <- colnames(tmp2)
names(phased) <- c("Hap.1", "Hap.2")
class(phased) <- "phase"
return(phased)
}

#' Open a ibd file and extract information.
#'
#' @param ibd_data a file name for output of \href{http://faculty.washington.edu/browning/refined-ibd.html}{Refined IBD}
#'
#' @param gwas object of class gwas
#' @param gwid_generator logical; if \code{TRUE} an object of class result_snps
#' will be saved inside output list.
#'
#' @return the output will be a object(list) of class gwid contains
#' profile object, IBD object and result_snps object.
#'
#' @export
build_gwid <- function(ibd_data = "name.ibd", gwas = "object of class gwas", gwid_generator = TRUE) {
ibd <- data.table::fread(ibd_data)
V1 <- V2 <- V3 <- V4 <- V5 <- V6 <- V7 <- V8 <- V9 <- NULL
ibd <- ibd[V1 %in% unlist(unique(gwas[["caco"]])) & V3 %in% unlist(unique(gwas[["caco"]]))]
class(ibd) <- append("IBD", class(ibd))
seq2 <- Vectorize(seq.default, vectorize.args = c("from", "to"))
profile <- ind <- vector(mode = "list", length = length(gwas[["caco"]])) # list length 6
for (j in seq_along(gwas[["caco"]])) {
ind[[j]] <- which(ibd$V1 %in% gwas[["caco"]][[j]] & ibd$V3 %in% gwas[["caco"]][[j]])
a1 <- ibd[V1 %in% gwas[["caco"]][[j]] & V3 %in% gwas[["caco"]][[j]]]
a2 <- seq2(match(a1$V6, gwas$snp.pos), match(a1$V7, gwas$snp.pos))
Un1 <- unlist(a2)
profile[[j]] <- Matrix::sparseMatrix(
i = rep(seq_along(a2), lengths(a2)),
j = Un1,
x = 1
)
if (ncol(profile[[j]]) < length(gwas$snp.pos)) {
if (min(Un1) > 1) {
mytemp <- Matrix(0, nrow = nrow(profile[[j]]), ncol = (min(Un1) - 1))
profile[[j]] <- cbind(mytemp, profile[[j]])
}

if (max(Un1) < length(gwas$snp.pos)) {
mytemp <- Matrix(0, nrow = nrow(profile[[j]]), ncol = (length(gwas$snp.pos) - max(Un1)))
profile[[j]] <- cbind(profile[[j]], mytemp)
}

profile[[j]] <- methods::as(profile[[j]], "sparseMatrix")
}
}

names(profile) <- names(ind) <- names(gwas[["caco"]])

class(profile) <- "profile"
output <- list(profile = profile, IND = ind)
output$snp_pos <- gwas[["snp.pos"]]
output$ibd <- ibd
class(output) <- "gwid"
if (gwid_generator) {
output$res <- extract(output)
}
return(output)
}
54 changes: 54 additions & 0 deletions R/case_control.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
csv2Rdat <- function(name = "", rep = 3) {
if (!file.exists(name)) message("File doesn't exist")
caco <- list()
output <- list()
input.data <- utils::read.csv(name)
check.column <- function(v) {
return(sum(c("1", "2") %in% as.integer(v)))
}
cacos <- names(which(apply(input.data, 2, check.column) == 2))

for (i in 1:length(cacos)) {
caco$cases <- input.data$SampleName[which(input.data[, cacos[i]] == 2)]
cont.ind <- which(input.data[, cacos[i]] == 1)
if (length(caco$cases) * rep > length(cont.ind)) caco$cases <- sample(caco$cases, trunc(length(cont.ind) / rep))
conts <- sample(cont.ind, length(caco$cases) * rep)
for (j in 1:rep) {
caco[[paste0("cont", j)]] <- input.data$SampleName[conts[rep(1:rep, each = length(caco$cases)) == j]]
}
output[[cacos[i]]] <- caco

}
output
}


#' Reload saved case-control list file
#'
#' @param case_control_rda A character string giving the name of the case-control
#' file to load. The file is a list of character vectors including subject names
#' in each case-control groups or csv file including subject name for a disease.
#'
#' @param ... name of a column (disease name) of csv file.
#'
#' @return The output will be a list of character vectors include subject names
#' and groups. The class of returned object is caco.
#'
#' @export
case_control <- function(case_control_rda, ...) {
if (missing(case_control_rda)) {
stop("provide a case_control list (rda file)")
}
substrRight <- function(x, n) {
substr(x, nchar(x) - n + 1, nchar(x))
}
if (substrRight(case_control_rda,3) == "csv") {
output <- csv2Rdat(case_control_rda)[[...]]
class(output) <- "caco"
return(output)
} else {
output <- get(load(case_control_rda))
class(output) <- "caco"
return(output)
}
}

0 comments on commit af77bc5

Please sign in to comment.