Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Nahuel Garcia authored and cran-robot committed Mar 26, 2023
0 parents commit 03c4f82
Show file tree
Hide file tree
Showing 13 changed files with 624 additions and 0 deletions.
17 changes: 17 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Package: simulMGF
Type: Package
Title: Simulate SNP Matrix, Phenotype and Genotypic Effects
Version: 0.1.0
Author: Martin Nahuel Garcia [aut, cre]
Maintainer: Martin Nahuel Garcia <garcia.martin@inta.gob.ar>
Description: Simulate genotypes in SNP (single nucleotide polymorphisms) Matrix as random numbers from an uniform distribution, for diploid organisms (coded by 0, 1, 2), Sikorska et al., (2013) <doi:10.1186/1471-2105-14-166>, or half-sib/full-sib SNP matrix from real or simulated parents SNP data, assuming mendelian segregation. Simulate phenotypic traits for real or simulated SNP data, controlled by a specific number of quantitative trait loci and their effects, sampled from a Normal or an Uniform distributions, assuming a pure additive model. This is useful for testing association and genomic prediction models or for educational purposes.
License: MIT + file LICENSE
Encoding: UTF-8
URL: https://github.com/mngar/simulMGF
RoxygenNote: 7.2.3
Suggests: spelling
Language: en-US
NeedsCompilation: no
Packaged: 2023-03-24 18:24:02 UTC; Martin
Repository: CRAN
Date/Publication: 2023-03-26 19:20:02 UTC
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: Martin Nahuel Garcia
12 changes: 12 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
e29f41b3635d6f4f2d15a92c41daac70 *DESCRIPTION
e57feaa59b02e2e9026cd9fd6b9d4bb0 *LICENSE
0a200adc8499c067712ced39581a2723 *NAMESPACE
62da970f2e38ee1eda76175b38f2cb66 *R/script_simulMGF.R
0ea0b536a649e8262825283c2fda8451 *README.md
b4a8d47ee8674fa56c787d88c51a9dee *man/simGeno.Rd
facd1d24d934399673fa32705c9d2448 *man/simPheno.Rd
8fd6a7ed10dc031b83e3f6a8028a7bd0 *man/simulFS.Rd
8ece08e404c0fc13ae74eed9ee205aa4 *man/simulHS.Rd
5bb5901f554d28b2573363803d7c14c5 *man/simulN.Rd
d501b9a52cbfd98a020057eafdcfe245 *man/simulU.Rd
332173f20d20942c02019350ea4dcadc *tests/spelling.R
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
exportPattern("^[[:alpha:]]+")
importFrom("stats", "rnorm", "runif")
284 changes: 284 additions & 0 deletions R/script_simulMGF.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
#simG <- simP <- simulatedFS <- simulatedHS <- nsimout <- usimout <- NULL
#' Function to simulate a random SNP matrix, phenotype and QTLs with effects sampled from a Normal distribution
#'
#' @param Nind number of individuals to simulate
#' @param Nmarkers number of SNP markers to generate
#' @param Nqtl number of QTLs to simulate
#' @param Esigma standard deviation of effects with distribution N~(0,Esigma^2)
#' @param Pmean phenotype mean
#' @param Perror standard deviation of error (portion of phenotype not explained by genomic information)
#'
#' @return a list with 4 elements
#' @export
#'
#' @examples
#' set.seed(123)
#' simulN(100, 1000, 50, .9, 12, .5)
#' #[1] "nsimout was generated"
#' str(nsimout)
#' #List of 4
#' #$ geno : num [1:100, 1:1000] 0 2 1 2 2 0 1 2 1 1 ...
#' #$ pheno : num [1:100, 1] 25.4 21.6 16 13.8 19.4 ...
#' #$ QTN : int [1:50] 568 474 529 349 45 732 416 51 413 514 ...
#' #$ Meffects: num [1:50] 0.2696 -0.1552 1.0192 0.0209 1.2023 ...
simulN <- function(Nind, Nmarkers, Nqtl, Esigma, Pmean, Perror){
#genotype simulation
x <- matrix(NA, Nind, Nmarkers)
for(i in 1:Nmarkers) {
x[,i] <- round(runif(Nind,-0.5,2.5))
}
#SNPs associated (random)
QTN <- sample(1:Nmarkers, Nqtl, replace=F)
#SNP effects from uniform distribution
Neffects <- rnorm(Nqtl,0,Esigma)
#portion of phenotype not explained by markers
error <- rnorm(Nind,0,Perror)
#phenotype simulation
for(i in 1:Nqtl)
{Term <- x[,QTN[i]]*Neffects[i]
assign(paste0("Term", i), Term)
}
t2 <- 0
for(i in 1:Nqtl)
{t1 <- get(paste0("Term", i))
t2 <- t1+t2
}
y <- Pmean+t2+error
dim(y) <- c(length(y), 1)
genphen <- list(geno = x, pheno = y, QTN = QTN, Meffects = Neffects)
nsimout <- NULL
nsimout <<- genphen
return("nsimout was generated")
}

#' Function to simulate half sib progeny genotypes from the genotype of one parent
#'
#' Simulate half sib progeny from one genotyped parent assuming a random genotype for the other parental. We assume that these are diploid organisms.
#'
#' @param x genotype matrix of a set of moms
#' @param Nprogeny number of progeny's genotypes to simulate from each mom
#'
#' @return a matrix of dimensions (nrow(x)*Nprogeny) x ncol(x)
#' @export
#'
#' @examples
#' #' #simulate 100 individuals and 1000 SNPs
#' set.seed(123)
#' simGeno(100, 1000)
#' #[1] "simG was generated"
#' #simulate the genotype of 3 sets 5 HS (one set by mom)
#' simulHS(simG[1:3,],5)
#' #[1] "simulatedHS was generated"
#' dim(simulatedHS)
#' #[1] 15 1000
simulHS <- function(x, Nprogeny){
#progeny genomic matrix based on mendelian segregation
newx <- matrix(NA,nrow(x)*Nprogeny,ncol(x))
g0 <- c(0,1)
g1 <- c(0,1,1,2)
g2 <- c(1,2)
for(i in 1:nrow(x)){
for(j in 1:Nprogeny){
for(m in 1:ncol(x)){
if(x[i,m] == 0){newx[i+nrow(x)*(j-1),m]=sample(g0,1,replace=T)
} else if(x[i,m] == 1){newx[i+nrow(x)*(j-1),m]=sample(g1,1,replace=T)
} else if(x[i,m] == 2){newx[i+nrow(x)*(j-1),m]=sample(g2,1,replace=T)
} else if(is.na(x[i,m])){newx[i+nrow(x)*(j-1),m]=="NA"
}
}
}
}
simulatedHS <- NULL
simulatedHS <<- newx
return("simulatedHS was generated")
}

#' Function to simulate full sib
#'
#' Simulate full sib progeny genotypes from the genotype of parentals (matrixes with the same dimensions). Pair of parents mating will be in the order of the matrixes. We assume that these are diploid organisms.
#'
#' @param x genotype matrix of a set of moms
#' @param y genotype matrix of a set of dads
#' @param Nprogeny number of progeny's genotypes to generate from each pair of parents
#'
#' @return a matrix of dimensions (nrow(x)*Nprogeny) x ncol(x)
#' @export
#'
#' @examples
#' #simulate 100 individuals and 1000 SNPs
#' set.seed(123)
#' simGeno(100, 1000)
#' #[1] "simG was generated"
#' #simulate the genotype of 5 FS from 3 pairs of parents
#' simulFS(simG[1:3,],simG[4:6,],5)
#' #[1] "simulatedFS was generated"
#' dim(simulatedFS)
#' #[1] 15 1000
#' # The first 5 individuals are progeny of mom 1 and dad 1, the second 5 individuals are progeny of mom 2 and dad 2, and so on.
simulFS <- function(x, y, Nprogeny){
#progeny genomic matrix based on mendelian segregation
newy <- matrix(NA,nrow(x)*Nprogeny,ncol(x))
g0 <- c(0,1)
g1 <- c(0,1,1,2)
g2 <- c(1,2)
for(i in 1:nrow(x)){
for(j in 1:Nprogeny){
for(m in 1:ncol(x)){
if(x[i,m] == 0 && y[i,m] == 0){newy[i+nrow(x)*(j-1),m]=0
} else if(x[i,m] == 1 && y[i,m] == 0){newy[i+nrow(x)*(j-1),m]=sample(g0,1,replace=T)
} else if(x[i,m] == 2 && y[i,m] == 0){newy[i+nrow(x)*(j-1),m]=1
} else if(x[i,m] == 0 && y[i,m] == 1){newy[i+nrow(x)*(j-1),m]=sample(g0,1,replace=T)
} else if(x[i,m] == 1 && y[i,m] == 1){newy[i+nrow(x)*(j-1),m]=sample(g1,1,replace=T)
} else if(x[i,m] == 2 && y[i,m] == 1){newy[i+nrow(x)*(j-1),m]=sample(g2,1,replace=T)
} else if(x[i,m] == 0 && y[i,m] == 2){newy[i+nrow(x)*(j-1),m]=1
} else if(x[i,m] == 1 && y[i,m] == 2){newy[i+nrow(x)*(j-1),m]=sample(g2,1,replace=T)
} else if(x[i,m] == 2 && y[i,m] == 2){newy[i+nrow(x)*(j-1),m]=2
} else if(is.na(x[i,m]) || is.na(y[i,m])){newy[i+nrow(x)*(j-1),m]=="NA"
}
}
}
}
simulatedFS <- NULL
simulatedFS <<- newy
return("simulatedFS was generated")
}

#' Function to simulate phenotypes
#'
#' Simulate a phenotype from a genotype matrix with QTLs with random effects sampled from a Normal distribution
#'
#' @param x SNP matrix coded like 0 homozygote; 1 heterozygote; 2 homozygote
#' @param Nqtl number of QTLs to simulate
#' @param Esigma standard deviation of effects with distribution N~(0,Esigma^2)
#' @param Pmean phenotype mean
#' @param Perror standard deviation of error (portion of phenotype not explained by genomic information)
#'
#' @return An object of class list containing the trait, the markers associated and their effects.
#' @export
#'
#' @examples
#' set.seed(123)
#' simGeno(100, 1000)
#' #' #[1] "simG was generated"
#' simPheno(simG, 50, .8, 12, .5)
#' #[1] "simP was generated"
#' str(simP)
#' #List of 3
#' #$ pheno : num [1:100, 1] 24 20.5 15.6 13.6 18.5 ...
#' #$ QTN : int [1:50] 568 474 529 349 45 732 416 51 413 514 ...
#' #$ Meffects: num [1:50] 0.2396 -0.138 0.906 0.0186 1.0687 ...
simPheno <- function(x, Nqtl, Esigma, Pmean, Perror){
#genotype data
Nind <- nrow(x)
Nmarkers <- ncol(x)
#SNPs associated (random)
QTN <- sample(1:Nmarkers, Nqtl, replace=F)
#SNP effects from uniform distribution
Neffects <- rnorm(Nqtl,0,Esigma)
#portion of phenotype not explained by markers
error <- rnorm(Nind,0,Perror)
#phenotype simulation
for(i in 1:Nqtl)
{Term <- x[,QTN[i]]*Neffects[i]
assign(paste0("Term", i), Term)
}
t2 <- 0
for(i in 1:Nqtl)
{t1 <- get(paste0("Term", i))
t2 <- t1+t2
}
y <- Pmean+t2+error
dim(y) <- c(length(y), 1)
pheno <- list(pheno = y, QTN = QTN, Meffects = Neffects)
simP <- NULL
simP <<- pheno
return("simP was generated")
}

#' Function to simulate SNP matrix
#'
#' Simulate SNP matrix coded 0, 1 and 2; with random genotypes
#'
#' @param Nind number of individuals to simulate
#' @param Nmarkers number of SNP markers to generate
#'
#' @return a matrix of dimensions Nind x Nmarkers
#' @export
#'
#' @examples
#' #simulate 100 individuals and 1000 SNPs
#' set.seed(123)
#' simGeno(100, 1000)
#' #[1] "simG was generated"
#' dim(simG);simG[1:5,1:5]
#' #[1] 100 1000
#' #[,1] [,2] [,3] [,4] [,5]
#' #[1,] 0 1 0 2 2
#' #[2,] 2 0 2 0 0
#' #[3,] 1 1 1 2 2
#' #[4,] 2 2 1 2 1
#' #[5,] 2 1 1 1 1
simGeno <- function(Nind, Nmarkers){
#genotype simulation
x <- matrix(NA, Nind, Nmarkers)
for(i in 1:Nmarkers) {
x[,i] <- round(runif(Nind,-0.5,2.5))
}
geno <- x
simG <- NULL
simG <<- geno
return("simG was generated")
}

#' Function to simulate a random SNP matrix, phenotype and QTLs with effects sampled from a Uniform distribution
#'
#' @param Nind number of individuals to simulate
#' @param Nmarkers number of SNP markers to generate
#' @param Nqtl number of QTLs to simulate
#' @param Pmean phenotype mean
#' @param Perror standard deviation of error (portion of phenotype not explained by genomic information)
#'
#' @return a list with 4 elements
#' @export
#'
#' @examples
#' set.seed(123)
#' simulU(100, 1000, 50, 12, .5)
#' #[1] "usimout was generated"
#' str(usimout)
#' #List of 4
#' #$ geno : num [1:100, 1:1000] 0 2 1 2 2 0 1 2 1 1 ...
#' #$ pheno : num [1:100, 1] 10.3 14.7 11.8 10.2 13.1 ...
#' #$ QTN : int [1:50] 568 474 529 349 45 732 416 51 413 514 ...
#' #$ Meffects: num [1:50] 0.2355 0.0158 -0.1369 -0.1246 0.7426 ...
simulU <- function(Nind, Nmarkers, Nqtl, Pmean, Perror){
#genotype simulation
x <- matrix(NA, Nind, Nmarkers)
for(i in 1:Nmarkers) {
x[,i] <- round(runif(Nind,-0.5,2.5))
}
#SNPs associated (random)
QTN <- sample(1:Nmarkers, Nqtl, replace=F)
#SNP effects from uniform distribution
Ueffects <- runif(Nqtl, -1, 1)
#portion of phenotype not explained by markers
error <- rnorm(Nind,0,Perror)
#phenotype simulation
for(i in 1:Nqtl)
{Term <- x[,QTN[i]]*Ueffects[i]
assign(paste0("Term", i), Term)
}
t2 <- 0
for(i in 1:Nqtl)
{t1 <- get(paste0("Term", i))
t2 <- t1+t2
}
y <- Pmean+t2+error
dim(y) <- c(length(y), 1)
genphen <- list(geno = x, pheno = y, QTN = QTN, Meffects = Ueffects)
usimout <- NULL
usimout <<- genphen
return("usimout was generated")
}


2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# simulMGF
Package to simulate a random SNP Matrix for diploid organisms (coded by 0, 1, 2) or half sib/full sib SNP matrix from real or simulated parents SNP data. Simulate phenotypic traits for real or simulated SNP data, controlled by a specific number of QTLs and their effects, sampled from a Normal or a Uniform distributions.
42 changes: 42 additions & 0 deletions man/simGeno.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
\name{simGeno}
\alias{simGeno}
\title{
Function to simulate SNP matrix
}
\description{
Simulate SNP matrix coded 0, 1 and 2; with random genotypes.
}
\usage{
simGeno(Nind, Nmarkers)
}
\arguments{
\item{Nind}{
number of individuals to simulate.
}
\item{Nmarkers}{
Nmarkers number of SNP markers to generate.
}
}
\value{
a matrix of dimensions Nind x Nmarkers.
}
\references{
Wu, R., Ma, C., & Casella, G. (2007). Statistical genetics of quantitative traits: linkage, maps and QTL. Springer Science & Business Media.
}
\author{
Martin Nahuel Garcia <orcid:0000-0001-5760-986X>
}
\examples{
#simulate 100 individuals and 1000 SNPs
set.seed(123)
simGeno(100, 1000)
#[1] "simG was generated"
dim(simG);simG[1:5,1:5]
#[1] 100 1000
#[,1] [,2] [,3] [,4] [,5]
#[1,] 0 1 0 2 2
#[2,] 2 0 2 0 0
#[3,] 1 1 1 2 2
#[4,] 2 2 1 2 1
#[5,] 2 1 1 1 1
}

0 comments on commit 03c4f82

Please sign in to comment.