-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit cb24136
Showing
12 changed files
with
367 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Package: gendata | ||
Type: Package | ||
Title: Generate and modify synthetic datasets | ||
Version: 1.0 | ||
Date: 2014-05-07 | ||
Author: Francis Huang <flh3@hotmail.com> | ||
Maintainer: Francis Huang <flh3@hotmail.com> | ||
Description: Set of functions to create datasets using a correlation matrix. | ||
License: GPL-3 | ||
Suggests: psych | ||
Packaged: 2014-05-08 11:52:56 UTC; huangf | ||
NeedsCompilation: no | ||
Repository: CRAN | ||
Date/Publication: 2014-05-08 15:00:04 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
7195afc887c32881466e57af6b4ed44d *DESCRIPTION | ||
df390c53434517b304ac5db487184641 *NAMESPACE | ||
342f1ce5c35aeb3cf31f40751e358665 *R/dtrans.R | ||
f4f8cedfa2020bf348b967e67aba1b87 *R/genmvnorm.R | ||
01ef3ab6e2a122c3c91d6870bc17a2e6 *R/recalib.R | ||
4cbd5ff37108a8c4cd99a19946511f89 *R/revcode.R | ||
67874b9ff91c1fd314102a5dc00cf566 *man/dtrans.Rd | ||
7ad81a8b5b71270c0033a82d41998a13 *man/gendata-package.Rd | ||
63f2bee019b17a527c52d9a5d84d6862 *man/genmvnorm.Rd | ||
0e7981599f1f44e390fb28f645fde363 *man/recalib.Rd | ||
72fe602011db9615eb043cb9a50193b6 *man/revcode.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
exportPattern("^[[:alpha:]]+") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#transform dataset to specified m and sd | ||
dtrans<-function(data,m,sd,rnd=F){ | ||
#basic checking | ||
x<-dim(data)[2] #getting number of vars | ||
cat("Number of variables in dataset:", x,"\n")#number of vars | ||
cat("Number of means specified:", length(m),"\n") | ||
cat("Number of standard deviations in dataset:", length(sd),"\n") | ||
if(length(m) != length(sd)){stop("Number of means/SDs should match.")} | ||
if(length(m) != dim(data)[2]){stop("Incorrect number of means/SDs")} | ||
for (i in 1:x){ | ||
xrange<-range(data[,1]) | ||
if(xrange[1]-xrange[2]==0){stop("Constant value detected")} | ||
} | ||
|
||
#transforming | ||
for (i in 1:x){ | ||
data[i]=m[i]+sd[i]*data[i] | ||
} | ||
if(rnd==T){return(round(data,0)) | ||
} else {return(data)} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
|
||
#################### | ||
genmvnorm<-function(cor,k,n,seed=F){ | ||
if(require("psych")!=T){ | ||
print("installing 'psych' package") | ||
install.packages("psych") | ||
require(psych)} | ||
if(seed != F){set.seed(seed)} | ||
if(is.matrix(cor)==F){ | ||
x<-length(cor) | ||
if(x != (k*(k-1)/2) ){stop("STOP: wrong correlation table")} | ||
cr.cor<-matrix(NA,k,k) | ||
diag(cr.cor)<-1 | ||
cr.cor[lower.tri(cr.cor)]<-cor | ||
cr.cor[upper.tri(cr.cor)]<-t(cr.cor)[upper.tri(cr.cor)] | ||
fit<-principal(cr.cor,rotate=F,nfactors=k)} | ||
|
||
if(is.matrix(cor)==T){ #if a correlation matrix was used | ||
fit<-principal(cor,rotate=F,nfactors=k) | ||
} | ||
|
||
l<-fit$loadings[1:k,1:k] | ||
f<-matrix(nrow=k,l) | ||
dim(f) | ||
ma<-matrix(nrow=n,ncol=k) | ||
for (i in 1:k){ | ||
ma[,i]<-rnorm(n) | ||
} | ||
tma<-t(ma) | ||
sol<-f %*% tma | ||
sol<-t(sol) | ||
|
||
data<-data.frame(sol) | ||
return(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
recalib<-function(data,var,low,high){ | ||
nmin<-low | ||
nmax<-high | ||
if(low>high){ | ||
nmax<-low | ||
nmin<-high | ||
} | ||
cmin<-min(data[,var]) | ||
cmax<-max(data[,var]) | ||
|
||
data[,var]<-(nmax-nmin)/(cmax-cmin)*(data[,var]-cmin)+nmin | ||
return(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#reverse coding | ||
revcode<-function(data,vars){ | ||
x<-length(vars) | ||
for (i in 1:x){ | ||
mx<-max(data[,vars[i]]) | ||
mn<-min(data[,vars[i]]) | ||
data[,vars[i]]<-(mx)-data[,vars[i]]+(mn) | ||
} | ||
return(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
\name{dtrans} | ||
\alias{dtrans} | ||
%- Also NEED an '\alias' for EACH other topic documented here. | ||
\title{ | ||
Data transform | ||
} | ||
\description{ | ||
Transforms variables in a dataset with a specified mean and standard deviation. | ||
} | ||
\usage{ | ||
dtrans(data, m, sd, rnd = F) | ||
} | ||
%- maybe also 'usage' for other objects documented here. | ||
\arguments{ | ||
\item{data}{ | ||
name of your dataset | ||
} | ||
\item{m}{ | ||
indicate a vector of desired means | ||
} | ||
\item{sd}{ | ||
indicate a vector of desired standard deviations | ||
} | ||
\item{rnd}{ | ||
Indicates if you want to round the numbers (no decimals). T or F. | ||
} | ||
} | ||
\author{ | ||
Francis Huang | ||
} | ||
|
||
\examples{ | ||
|
||
sdata<-genmvnorm(cor=c(.7,.2,.3),k=3,n=500,seed=12345) | ||
cor(sdata) | ||
summary(sdata) | ||
#note: data are in z scores | ||
|
||
s2<-dtrans(sdata,c(0,100,50),c(1,15,10),rnd=FALSE) | ||
summary(s2) | ||
sd(s2[,2]) | ||
sd(s2[,3]) | ||
#note: variables X2 and X3 are now rescaled with the appropriate means and standard deviations. | ||
head(s2) | ||
|
||
s2<-dtrans(sdata,c(0,100,50),c(1,15,10),rnd=TRUE) | ||
#at times, you may want a dataset to not have decimals. use rnd=T. | ||
head(s2) | ||
} | ||
% Add one or more standard keywords, see file 'KEYWORDS' in the | ||
% R documentation directory. | ||
\keyword{ transform } | ||
\keyword{ data }% __ONLY ONE__ keyword per line |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
\name{gendata-package} | ||
\alias{gendata-package} | ||
\alias{gendata} | ||
\docType{package} | ||
\title{ | ||
Generate synthetic datasets | ||
} | ||
\description{ | ||
Create synthetic datasets based on a correlation table. Additional functions can be used to rescale, transform, and reverse code variables. | ||
} | ||
\details{ | ||
\tabular{ll}{ | ||
Package: \tab gendata\cr | ||
Type: \tab Package\cr | ||
Version: \tab 1.0\cr | ||
Date: \tab 2014-05-07\cr | ||
License: \tab GPL-3\cr | ||
} | ||
|
||
Additional functions are for modifying the dataset. \cr | ||
|
||
genmvnorm: | ||
creates the dataset (generates a multivariate normal dataset). \cr | ||
recalib : for rescaling the dataset \cr | ||
dtrans : for giving a variable a new mean and standard deviation \cr | ||
revcode : for reverse coding a variable | ||
} | ||
\author{ | ||
Francis Huang | ||
|
||
Maintainer: Francis Huang <flh3@hotmail.com> | ||
|
||
} | ||
\references{ | ||
|
||
Fan, X., Sivo, S., & Keenan, S. (2002). SAS for Monte Carlo studies: A guide for quantitative researchers. Sas Institute. | ||
} | ||
|
||
\keyword{ package } | ||
\seealso{ | ||
\link[gendata:genmvnorm]{genmvnorm} | ||
\link[gendata:revcode]{revcode} | ||
\link[gendata:dtrans]{dtrans} | ||
\link[gendata:recalib]{recalib} | ||
|
||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
\name{genmvnorm} | ||
\alias{genmvnorm} | ||
%- Also NEED an '\alias' for EACH other topic documented here. | ||
\title{ | ||
genmvnorm | ||
} | ||
\description{ | ||
Generates a multivariate normal dataset based on a specified correlation matrix. | ||
} | ||
\usage{ | ||
genmvnorm(cor, k, n, seed = F) | ||
} | ||
%- maybe also 'usage' for other objects documented here. | ||
\arguments{ | ||
\item{cor}{Can be a correlation matrix-- e.g., data<-cor(xyz)-- or the lower half | ||
of a correlation matrix, e.g., for a 3 variable dataset, data<-c(.7,.3,.2)-- useful for creating datasets without having to specify both halves of the correlation matrix. | ||
|
||
} | ||
\item{k}{ | ||
Indicate the number of variables in your dataset. | ||
} | ||
\item{n}{ | ||
Indicate the number of observations in your new synthetic dataset. | ||
} | ||
\item{seed}{ | ||
For reproducability of results, set a specific seed number. | ||
} | ||
} | ||
\details{ | ||
For creating synthetic datasets. Based on the SAS chapter by Fan et al. (2002). | ||
} | ||
|
||
\references{Based on: | ||
|
||
Fan, X., Sivo, S., & Keenan, S. (2002). SAS for Monte Carlo studies: A guide for quantitative researchers. SAS Institute. | ||
} | ||
\author{ | ||
Francis Huang | ||
} | ||
|
||
|
||
\seealso{ | ||
\link[gendata:revcode]{revcode} | ||
\link[gendata:dtrans]{dtrans} | ||
\link[gendata:recalib]{recalib} | ||
|
||
} | ||
\examples{ | ||
sdata<-genmvnorm(cor=c(.7,.2,.3),k=3,n=500,seed=12345) | ||
cor(sdata) | ||
#dataset above uses the lower half of a correlation table | ||
# 1 .7 .2 | ||
# .7 1 .3 | ||
# .2 .3 1 | ||
# Can also use a correlation table | ||
|
||
data(iris) | ||
dat<-cor(iris[,1:3]) | ||
dat | ||
sdata<-genmvnorm(cor=dat,k=3,n=100,seed=123) | ||
cor(sdata) | ||
|
||
#example above uses the IRIS dataset. | ||
} | ||
% Add one or more standard keywords, see file 'KEYWORDS' in the | ||
% R documentation directory. | ||
\keyword{ montecarlo } | ||
\keyword{ synthetic }% __ONLY ONE__ keyword per line |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
\name{recalib} | ||
\alias{recalib} | ||
%- Also NEED an '\alias' for EACH other topic documented here. | ||
\title{ | ||
Recalibrate (rescale) variables | ||
} | ||
\description{ | ||
Rescale variables (one at a time) to have a new minimum and maximum value. | ||
} | ||
\usage{ | ||
recalib(data, var, low, high) | ||
} | ||
%- maybe also 'usage' for other objects documented here. | ||
\arguments{ | ||
\item{data}{the dataset to use. | ||
} | ||
\item{var}{indicate the variable number (or variable name).} | ||
\item{low}{ | ||
Indicate the new minimum value. | ||
} | ||
\item{high}{ | ||
Indicate the new maximum value. | ||
} | ||
} | ||
\details{ | ||
Specify the rescaling of variables one at a time. | ||
} | ||
|
||
|
||
\author{ | ||
Francis Huang | ||
} | ||
|
||
|
||
\seealso{ | ||
|
||
\link[gendata:genmvnorm]{genmvnorm} | ||
\link[gendata:revcode]{revcode} | ||
\link[gendata:dtrans]{dtrans} | ||
} | ||
\examples{ | ||
sdata<-genmvnorm(cor=c(.7,.2,.3),k=3,n=500,seed=12345) | ||
cor(sdata) | ||
summary(sdata[,1]) | ||
#note the min and max of variable X1 | ||
#changes variable one to have a minimum of 10 and a maximum of 50 | ||
#correlations remain the same | ||
|
||
s2<-recalib(sdata,1,10,50) | ||
cor(s2) | ||
summary(s2[,1]) | ||
#note revised values of variable X1 | ||
|
||
} | ||
% Add one or more standard keywords, see file 'KEYWORDS' in the | ||
% R documentation directory. | ||
\keyword{ ~kwd1 } | ||
\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
\name{revcode} | ||
\alias{revcode} | ||
%- Also NEED an '\alias' for EACH other topic documented here. | ||
\title{ | ||
Reverse coding variables | ||
} | ||
\description{ | ||
Reverse codes variables | ||
} | ||
\usage{ | ||
revcode(data, vars) | ||
} | ||
%- maybe also 'usage' for other objects documented here. | ||
\arguments{ | ||
\item{data}{ | ||
indicates your dataset. | ||
} | ||
\item{vars}{ | ||
indicates the variable number or name to reverse code. | ||
} | ||
} | ||
|
||
\author{ | ||
Francis Huang | ||
} | ||
|
||
|
||
\seealso{ | ||
\link[gendata:genmvnorm]{genmvnorm} | ||
\link[gendata:dtrans]{dtrans} | ||
\link[gendata:recalib]{recalib} | ||
} | ||
|
||
\keyword{ reverse } | ||
\keyword{ recode }% __ONLY ONE__ keyword per line |