-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 951cc7d
Showing
10 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Package: MTest | ||
Type: Package | ||
Title: A Procedure for Multicollinearity Testing using Bootstrap | ||
Version: 1.0.0 | ||
Date: 2023-07-15 | ||
Authors@R: c(person("Víctor", "Morales-Oñate",role=c("aut","cre"),email="victor.morales@uv.cl",comment = c(ORCID = "0000-0003-1922-6571")),person("Bolívar", "Morales-Oñate", role = "aut",email = "bmoralesonate@gmail.com",comment = c(ORCID = "0000-0003-4980-8759"))) | ||
Maintainer: Víctor Morales-Oñate <victor.morales@uv.cl> | ||
Description: Functions for detecting multicollinearity. This test gives statistical support to two of the most famous methods for detecting multicollinearity in applied work: Klein’s rule and Variance Inflation Factor (VIF). See the URL for the papers associated with this package, as for instance, Morales-Oñate and Morales-Oñate (2015) <doi:10.33333/rp.vol51n2.05>. | ||
Depends: R (>= 4.0.5) | ||
License: GPL (>= 3) | ||
Encoding: UTF-8 | ||
Imports: car | ||
Repository: CRAN | ||
URL: https://github.com/vmoprojs/MTest | ||
BugReports: https://github.com/vmoprojs/MTest/issues | ||
LazyData: true | ||
NeedsCompilation: no | ||
Packaged: 2023-07-17 14:40:11 UTC; victormorales | ||
Author: Víctor Morales-Oñate [aut, cre] | ||
(<https://orcid.org/0000-0003-1922-6571>), | ||
Bolívar Morales-Oñate [aut] (<https://orcid.org/0000-0003-4980-8759>) | ||
Date/Publication: 2023-07-18 09:20:08 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
e4a713766e6a31ba35c6a28213b47224 *DESCRIPTION | ||
2618f06dafa7bc8a5b803832aac1dd0b *NAMESPACE | ||
df74c3f377089e152b54b55b9920d9aa *R/MTest.R | ||
7faf740febfe1c9cdb8ae89e8c3402fc *R/pairwiseKStest.R | ||
439bf689fa27cf9affd0335332142165 *build/partial.rdb | ||
89bc2ba09e07d8ada709c04e793c101c *data/simDataMTest.RData | ||
e25f6fe5e25a17f31231f482c7de5fcd *man/MTest.Rd | ||
57fdf0c7ce44ef6ef103af0eb0c05e44 *man/pairwiseKStest.Rd | ||
30dfd752af61eaecfff2cffbdc217306 *man/simDataMTest.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
importFrom("stats", "formula", "ks.test", "lm", "terms") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#################################################### | ||
### File name: MTest.r | ||
#################################################### | ||
MTest <- function(object, nboot = 100, | ||
nsam = NULL,trace = FALSE,seed = NULL, | ||
valor_vif = 0.9) | ||
{ | ||
datos <- object$model | ||
ff <- formula(object) | ||
if(is.null(nsam)){nsam = nrow(datos)} | ||
|
||
vals <- 1:nrow(datos) | ||
|
||
if(!is.null(seed)) {set.seed(seed)} | ||
|
||
sol.rsq <- NULL | ||
sol.vif <- NULL | ||
i = 1 | ||
|
||
tt <- proc.time() | ||
while(i <=nboot) | ||
{ | ||
sam <- sample(vals,nsam,replace = TRUE) | ||
aux <- datos[sam,] | ||
maux <- lm(ff,data = aux) | ||
sm <- summary(maux) | ||
if(any(attr(terms(object),"order")>1)) | ||
{ | ||
vif.vals <- suppressMessages(car::vif(maux,type = "predictor")) | ||
vif.vals <- vif.vals[,3] | ||
}else{ | ||
vif.vals <- car::vif(maux,type = "terms") | ||
} | ||
|
||
Raux <- (vif.vals-1)/vif.vals | ||
|
||
s1 <- c(sm$r.squared,Raux) | ||
sol.rsq <- rbind(sol.rsq,s1) | ||
|
||
sol.vif <- rbind(sol.vif,vif.vals) | ||
|
||
if(trace) | ||
{ | ||
cat("Iteration",i,"out of ",nboot,"\n") | ||
} | ||
i = i+1 | ||
} | ||
tt <- proc.time()-tt | ||
print(tt) | ||
|
||
|
||
|
||
|
||
|
||
pval_vif <- NULL | ||
|
||
for(j in 2:ncol(sol.rsq)) | ||
{ | ||
pval_vif <- c(pval_vif,sum(sol.rsq[,j]>valor_vif)/nboot) | ||
} | ||
names(pval_vif) <- colnames(sol.rsq)[2:ncol(sol.rsq)] | ||
pval_klein <- NULL | ||
for(z in 2:ncol(sol.rsq)) | ||
{ | ||
pval_klein <- c(pval_klein,sum(sol.rsq[,1]<sol.rsq[,z])/nboot) | ||
} | ||
names(pval_klein) <- colnames(sol.rsq)[2:ncol(sol.rsq)] | ||
|
||
colnames(sol.rsq) <- c("global",paste(names(datos)[-1],sep ="")) | ||
rownames(sol.rsq) <- 1:nrow(sol.rsq) | ||
return(list(Bvals= sol.rsq,pval_vif = pval_vif,pval_klein=pval_klein)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#################################################### | ||
### File name: pairwiseKStets.r | ||
#################################################### | ||
pairwiseKStest <- function(X,alternative="greater") | ||
{ | ||
#Returns the p value of the pairwise KS test of X columns | ||
n <- ncol(X) | ||
sol <- matrix(NA, ncol = n, nrow = n) | ||
for(i in 1:(n)) | ||
{ | ||
for(j in (1):n) | ||
{ | ||
a <- suppressWarnings(ks.test(X[,i],X[,j],alternative = alternative)) | ||
sol[i,j] <- a$p.value | ||
} | ||
} | ||
if(alternative=="less") {mes = ("alternative hypothesis: the CDF of x lies below that of y. Rows are `x` and Columns are `y`")} | ||
if(alternative=="greater") {mes = ("alternative hypothesis: the CDF of x lies above that of y. Rows are `x` and Columns are `y`")} | ||
if(alternative=="two.sided") {mes = ("alternative hypothesis: two-sided")} | ||
colnames(sol) <- colnames(X) | ||
rownames(sol) <- colnames(X) | ||
return(list(KSpwMatrix = sol,mes=mes)) | ||
} |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
\name{MTest} | ||
\alias{MTest} | ||
\title{MTest} | ||
\usage{ | ||
MTest(object, nboot = 100, | ||
nsam = NULL,trace = FALSE,seed = NULL, | ||
valor_vif = 0.9) | ||
} | ||
\description{ | ||
|
||
MTest is a nonparametric test based on bootstrap for detecting multicollinearity. This test gives statistical support to two of the most famous methods for detecting multicollinearity in applied work: Klein’s rule and Variance Inflation Factor (VIF for essential multicollinearity). | ||
} | ||
|
||
|
||
\arguments{ | ||
\item{object}{an object representing a model of an appropriate class (mainly "lm"). This is used as the model in MTest.} | ||
\item{nboot}{Numeric; number of bootstrap iterations to obtain the probability distribution of R squared (global and auxiliar).} | ||
\item{nsam}{Numeric; sample size for bootstrap samples.} | ||
\item{trace}{Logical; prints iteration process.} | ||
\item{seed}{Numeric; seed value for the bootstrap in nboot parameter.} | ||
\item{valor_vif}{Numeric; value to be compared in kleins rule.} | ||
} | ||
|
||
\details{ | ||
MTest generates a bootstrap distribution for the coefficient of determination which lets the researcher assess multicollinearity by setting a statistical significance \eqn{\alpha}, or more precisely, an achieved significance level (ASL) for a given threshold. | ||
|
||
|
||
Consider the regression model | ||
|
||
\eqn{ | ||
Y_i = \beta_0X_{0i} + \beta_1X_{1i} + \cdots+ \beta_pX_{pi} +u_i | ||
} | ||
|
||
where \eqn{i = 1,...,n}, \eqn{X_{j,i}} are the predictors with \eqn{j = 1,...,p}, \eqn{X_0 = 1} for all \eqn{i} and \eqn{u_i} is the gaussian error term. | ||
|
||
In order to describe Klein's rule and VIF methods, we need to define \emph{auxiliary regressions} associated to model. An example of an auxiliary regressions is: | ||
\eqn{ | ||
X_{2i} = \gamma_1X_{1i} + \gamma_3X_{3i} + \cdots+ \gamma_pX_{pi} +u_i. | ||
} | ||
In general, there are \eqn{p} auxiliary regressions and the dependent variable is omitted in each auxiliary regression. Let \eqn{R_{g}^{2}} be the coefficient of determination of the model and \eqn{R_{j}^{2}} the \eqn{j\text{th}} coefficient of determination of the \eqn{j\text{th}} auxiliary regression. | ||
} | ||
\value{ | ||
Returns an object of class \code{MTest}. | ||
An object of class \code{MTest} is a list containing | ||
at most the following components: | ||
\item{pval_vif}{p values for vif test;} | ||
\item{pval_klein}{p values for klein test;} | ||
\item{Bvals}{A \eqn{nboot \times (p+1)} matrix where rows are the number of bootstap samples and the columns are \eqn{R_{g_{boot}}^{2}} and \eqn{R_{j_{boot}}^{2}} which are estimates of estimates of \eqn{R_{g}^{2}} and \eqn{R_{j}^{2}}, see Section \bold{Details}} | ||
} | ||
\author{Víctor Morales Oñate, \email{victor.morales@uv.cl}, \url{https://sites.google.com/site/moralesonatevictor/},\url{https://www.linkedin.com/in/vmoralesonate/} | ||
Bolívar Morales Oñate, \email{bmoralesonate@gmail.com}, \url{https://sites.google.com/site/moralesonatevictor/} | ||
} | ||
\references{ | ||
Morales-Oñate, V., and Morales-Oñate, B. (2023). \emph{MTest: a Bootstrap Test for Multicollinearity}. Revista Politécnica, 51(2), 53–62. \doi{https://doi.org/10.33333/rp.vol51n2.05} | ||
} | ||
\examples{ | ||
library(MTest) | ||
data(simDataMTest) | ||
m1 <- lm(y~.,data = simDataMTest) | ||
boot.sol <- MTest(m1,trace=FALSE,seed = 1,nboot = 50) | ||
boot.sol$pval_vif | ||
boot.sol$pval_klein | ||
head(boot.sol$Bvals) | ||
} | ||
\keyword{Multicollinearity} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
\name{pairwiseKStest} | ||
\alias{pairwiseKStest} | ||
\title{pairwiseKStest} | ||
\usage{ | ||
pairwiseKStest(X,alternative="greater") | ||
} | ||
\description{ | ||
Returns the \eqn{p}-value of the columns of \code{X} (pairwisely). | ||
} | ||
|
||
|
||
\arguments{ | ||
\item{X}{Numeric; a matrix (\code{Bvals} output from \code{MTest} function) whose columns are to be compared.} | ||
\item{alternative}{String; letter of the value, but the argument name must be given in full. See ‘ks.test’ for the meanings of the possible values.} | ||
} | ||
|
||
\details{Using a pairwise Kolmogorov-Smirnov (KS) test of a given matrix \code{X}. In particular, if \code{X} is the \code{Bvals} output from \code{MTest} function, \code{pairwiseKStest} establishes a guide for an educated removal of variables that are causing multicolli-nearity. | ||
|
||
Note that the matrix \eqn{B_{n_{boot}\times (p+1)}} (which is \code{Bvals} output from \code{MTest} function) allow us to inspect results in detail and make further tests such as boxplots, pariwise Kolmogorov-Smirnov (KS) of the predictors and so on. | ||
} | ||
|
||
|
||
\value{ | ||
Returns an object of class \code{pairwiseKStest}. | ||
An object of class \code{pairwiseKStest} is a list containing | ||
at most the following components: | ||
\item{KSpwMatrix}{\eqn{p}-values matrix of pairwise KS testing;} | ||
\item{mes}{Character; indicates the alternative hypothesis.} | ||
} | ||
|
||
\author{Víctor Morales Oñate, \email{victor.morales@uv.cl}, \url{https://sites.google.com/site/moralesonatevictor/},\url{https://www.linkedin.com/in/vmoralesonate/} | ||
Bolívar Morales Oñate, \email{bmoralesonate@gmail.com}, \url{https://sites.google.com/site/moralesonatevictor/} | ||
} | ||
|
||
\references{ | ||
Morales-Oñate, V., and Morales-Oñate, B. (2023). \emph{MTest: a Bootstrap Test for Multicollinearity}. Revista Politécnica, 51(2), 53–62. \doi{https://doi.org/10.33333/rp.vol51n2.05} | ||
} | ||
|
||
\examples{ | ||
library(MTest) | ||
data(simDataMTest) | ||
pairwiseKStest(X=simDataMTest) | ||
} | ||
|
||
|
||
\keyword{KS} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
\name{simDataMTest} | ||
\docType{data} | ||
\alias{simDataMTest} | ||
\title{Simulated data for MTest} | ||
\description{ | ||
This data set helps testing functions in MTest package, the generating process is documented in the reference. | ||
} | ||
\usage{simDataMTest} | ||
\format{A dataframe containing 10000 observations and four columns.} | ||
\references{ | ||
Morales-Oñate, V., and Morales-Oñate, B. (2023). \emph{MTest: a Bootstrap Test for Multicollinearity}. Revista Politécnica, 51(2), 53–62. \doi{https://doi.org/10.33333/rp.vol51n2.05} | ||
} | ||
\keyword{datasets} |