Skip to content

Commit

Permalink
version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
flh3 authored and gaborcsardi committed May 7, 2014
0 parents commit cb24136
Show file tree
Hide file tree
Showing 12 changed files with 367 additions and 0 deletions.
14 changes: 14 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,14 @@
Package: gendata
Type: Package
Title: Generate and modify synthetic datasets
Version: 1.0
Date: 2014-05-07
Author: Francis Huang <flh3@hotmail.com>
Maintainer: Francis Huang <flh3@hotmail.com>
Description: Set of functions to create datasets using a correlation matrix.
License: GPL-3
Suggests: psych
Packaged: 2014-05-08 11:52:56 UTC; huangf
NeedsCompilation: no
Repository: CRAN
Date/Publication: 2014-05-08 15:00:04
11 changes: 11 additions & 0 deletions MD5
@@ -0,0 +1,11 @@
7195afc887c32881466e57af6b4ed44d *DESCRIPTION
df390c53434517b304ac5db487184641 *NAMESPACE
342f1ce5c35aeb3cf31f40751e358665 *R/dtrans.R
f4f8cedfa2020bf348b967e67aba1b87 *R/genmvnorm.R
01ef3ab6e2a122c3c91d6870bc17a2e6 *R/recalib.R
4cbd5ff37108a8c4cd99a19946511f89 *R/revcode.R
67874b9ff91c1fd314102a5dc00cf566 *man/dtrans.Rd
7ad81a8b5b71270c0033a82d41998a13 *man/gendata-package.Rd
63f2bee019b17a527c52d9a5d84d6862 *man/genmvnorm.Rd
0e7981599f1f44e390fb28f645fde363 *man/recalib.Rd
72fe602011db9615eb043cb9a50193b6 *man/revcode.Rd
1 change: 1 addition & 0 deletions NAMESPACE
@@ -0,0 +1 @@
exportPattern("^[[:alpha:]]+")
21 changes: 21 additions & 0 deletions R/dtrans.R
@@ -0,0 +1,21 @@
#transform dataset to specified m and sd
dtrans<-function(data,m,sd,rnd=F){
#basic checking
x<-dim(data)[2] #getting number of vars
cat("Number of variables in dataset:", x,"\n")#number of vars
cat("Number of means specified:", length(m),"\n")
cat("Number of standard deviations in dataset:", length(sd),"\n")
if(length(m) != length(sd)){stop("Number of means/SDs should match.")}
if(length(m) != dim(data)[2]){stop("Incorrect number of means/SDs")}
for (i in 1:x){
xrange<-range(data[,1])
if(xrange[1]-xrange[2]==0){stop("Constant value detected")}
}

#transforming
for (i in 1:x){
data[i]=m[i]+sd[i]*data[i]
}
if(rnd==T){return(round(data,0))
} else {return(data)}
}
35 changes: 35 additions & 0 deletions R/genmvnorm.R
@@ -0,0 +1,35 @@

####################
genmvnorm<-function(cor,k,n,seed=F){
if(require("psych")!=T){
print("installing 'psych' package")
install.packages("psych")
require(psych)}
if(seed != F){set.seed(seed)}
if(is.matrix(cor)==F){
x<-length(cor)
if(x != (k*(k-1)/2) ){stop("STOP: wrong correlation table")}
cr.cor<-matrix(NA,k,k)
diag(cr.cor)<-1
cr.cor[lower.tri(cr.cor)]<-cor
cr.cor[upper.tri(cr.cor)]<-t(cr.cor)[upper.tri(cr.cor)]
fit<-principal(cr.cor,rotate=F,nfactors=k)}

if(is.matrix(cor)==T){ #if a correlation matrix was used
fit<-principal(cor,rotate=F,nfactors=k)
}

l<-fit$loadings[1:k,1:k]
f<-matrix(nrow=k,l)
dim(f)
ma<-matrix(nrow=n,ncol=k)
for (i in 1:k){
ma[,i]<-rnorm(n)
}
tma<-t(ma)
sol<-f %*% tma
sol<-t(sol)

data<-data.frame(sol)
return(data)
}
13 changes: 13 additions & 0 deletions R/recalib.R
@@ -0,0 +1,13 @@
recalib<-function(data,var,low,high){
nmin<-low
nmax<-high
if(low>high){
nmax<-low
nmin<-high
}
cmin<-min(data[,var])
cmax<-max(data[,var])

data[,var]<-(nmax-nmin)/(cmax-cmin)*(data[,var]-cmin)+nmin
return(data)
}
10 changes: 10 additions & 0 deletions R/revcode.R
@@ -0,0 +1,10 @@
#reverse coding
revcode<-function(data,vars){
x<-length(vars)
for (i in 1:x){
mx<-max(data[,vars[i]])
mn<-min(data[,vars[i]])
data[,vars[i]]<-(mx)-data[,vars[i]]+(mn)
}
return(data)
}
53 changes: 53 additions & 0 deletions man/dtrans.Rd
@@ -0,0 +1,53 @@
\name{dtrans}
\alias{dtrans}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
Data transform
}
\description{
Transforms variables in a dataset with a specified mean and standard deviation.
}
\usage{
dtrans(data, m, sd, rnd = F)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{data}{
name of your dataset
}
\item{m}{
indicate a vector of desired means
}
\item{sd}{
indicate a vector of desired standard deviations
}
\item{rnd}{
Indicates if you want to round the numbers (no decimals). T or F.
}
}
\author{
Francis Huang
}

\examples{

sdata<-genmvnorm(cor=c(.7,.2,.3),k=3,n=500,seed=12345)
cor(sdata)
summary(sdata)
#note: data are in z scores

s2<-dtrans(sdata,c(0,100,50),c(1,15,10),rnd=FALSE)
summary(s2)
sd(s2[,2])
sd(s2[,3])
#note: variables X2 and X3 are now rescaled with the appropriate means and standard deviations.
head(s2)

s2<-dtrans(sdata,c(0,100,50),c(1,15,10),rnd=TRUE)
#at times, you may want a dataset to not have decimals. use rnd=T.
head(s2)
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{ transform }
\keyword{ data }% __ONLY ONE__ keyword per line
48 changes: 48 additions & 0 deletions man/gendata-package.Rd
@@ -0,0 +1,48 @@
\name{gendata-package}
\alias{gendata-package}
\alias{gendata}
\docType{package}
\title{
Generate synthetic datasets
}
\description{
Create synthetic datasets based on a correlation table. Additional functions can be used to rescale, transform, and reverse code variables.
}
\details{
\tabular{ll}{
Package: \tab gendata\cr
Type: \tab Package\cr
Version: \tab 1.0\cr
Date: \tab 2014-05-07\cr
License: \tab GPL-3\cr
}

Additional functions are for modifying the dataset. \cr

genmvnorm:
creates the dataset (generates a multivariate normal dataset). \cr
recalib : for rescaling the dataset \cr
dtrans : for giving a variable a new mean and standard deviation \cr
revcode : for reverse coding a variable
}
\author{
Francis Huang

Maintainer: Francis Huang <flh3@hotmail.com>

}
\references{

Fan, X., Sivo, S., & Keenan, S. (2002). SAS for Monte Carlo studies: A guide for quantitative researchers. Sas Institute.
}

\keyword{ package }
\seealso{
\link[gendata:genmvnorm]{genmvnorm}
\link[gendata:revcode]{revcode}
\link[gendata:dtrans]{dtrans}
\link[gendata:recalib]{recalib}


}

68 changes: 68 additions & 0 deletions man/genmvnorm.Rd
@@ -0,0 +1,68 @@
\name{genmvnorm}
\alias{genmvnorm}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
genmvnorm
}
\description{
Generates a multivariate normal dataset based on a specified correlation matrix.
}
\usage{
genmvnorm(cor, k, n, seed = F)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{cor}{Can be a correlation matrix-- e.g., data<-cor(xyz)-- or the lower half
of a correlation matrix, e.g., for a 3 variable dataset, data<-c(.7,.3,.2)-- useful for creating datasets without having to specify both halves of the correlation matrix.

}
\item{k}{
Indicate the number of variables in your dataset.
}
\item{n}{
Indicate the number of observations in your new synthetic dataset.
}
\item{seed}{
For reproducability of results, set a specific seed number.
}
}
\details{
For creating synthetic datasets. Based on the SAS chapter by Fan et al. (2002).
}

\references{Based on:

Fan, X., Sivo, S., & Keenan, S. (2002). SAS for Monte Carlo studies: A guide for quantitative researchers. SAS Institute.
}
\author{
Francis Huang
}


\seealso{
\link[gendata:revcode]{revcode}
\link[gendata:dtrans]{dtrans}
\link[gendata:recalib]{recalib}

}
\examples{
sdata<-genmvnorm(cor=c(.7,.2,.3),k=3,n=500,seed=12345)
cor(sdata)
#dataset above uses the lower half of a correlation table
# 1 .7 .2
# .7 1 .3
# .2 .3 1
# Can also use a correlation table

data(iris)
dat<-cor(iris[,1:3])
dat
sdata<-genmvnorm(cor=dat,k=3,n=100,seed=123)
cor(sdata)

#example above uses the IRIS dataset.
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{ montecarlo }
\keyword{ synthetic }% __ONLY ONE__ keyword per line
58 changes: 58 additions & 0 deletions man/recalib.Rd
@@ -0,0 +1,58 @@
\name{recalib}
\alias{recalib}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
Recalibrate (rescale) variables
}
\description{
Rescale variables (one at a time) to have a new minimum and maximum value.
}
\usage{
recalib(data, var, low, high)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{data}{the dataset to use.
}
\item{var}{indicate the variable number (or variable name).}
\item{low}{
Indicate the new minimum value.
}
\item{high}{
Indicate the new maximum value.
}
}
\details{
Specify the rescaling of variables one at a time.
}


\author{
Francis Huang
}


\seealso{

\link[gendata:genmvnorm]{genmvnorm}
\link[gendata:revcode]{revcode}
\link[gendata:dtrans]{dtrans}
}
\examples{
sdata<-genmvnorm(cor=c(.7,.2,.3),k=3,n=500,seed=12345)
cor(sdata)
summary(sdata[,1])
#note the min and max of variable X1
#changes variable one to have a minimum of 10 and a maximum of 50
#correlations remain the same

s2<-recalib(sdata,1,10,50)
cor(s2)
summary(s2[,1])
#note revised values of variable X1

}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{ ~kwd1 }
\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
35 changes: 35 additions & 0 deletions man/revcode.Rd
@@ -0,0 +1,35 @@
\name{revcode}
\alias{revcode}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
Reverse coding variables
}
\description{
Reverse codes variables
}
\usage{
revcode(data, vars)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{data}{
indicates your dataset.
}
\item{vars}{
indicates the variable number or name to reverse code.
}
}

\author{
Francis Huang
}


\seealso{
\link[gendata:genmvnorm]{genmvnorm}
\link[gendata:dtrans]{dtrans}
\link[gendata:recalib]{recalib}
}

\keyword{ reverse }
\keyword{ recode }% __ONLY ONE__ keyword per line

0 comments on commit cb24136

Please sign in to comment.