Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
vmoprojs authored and cran-robot committed Jul 18, 2023
0 parents commit 951cc7d
Show file tree
Hide file tree
Showing 10 changed files with 261 additions and 0 deletions.
22 changes: 22 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Package: MTest
Type: Package
Title: A Procedure for Multicollinearity Testing using Bootstrap
Version: 1.0.0
Date: 2023-07-15
Authors@R: c(person("Víctor", "Morales-Oñate",role=c("aut","cre"),email="victor.morales@uv.cl",comment = c(ORCID = "0000-0003-1922-6571")),person("Bolívar", "Morales-Oñate", role = "aut",email = "bmoralesonate@gmail.com",comment = c(ORCID = "0000-0003-4980-8759")))
Maintainer: Víctor Morales-Oñate <victor.morales@uv.cl>
Description: Functions for detecting multicollinearity. This test gives statistical support to two of the most famous methods for detecting multicollinearity in applied work: Klein’s rule and Variance Inflation Factor (VIF). See the URL for the papers associated with this package, as for instance, Morales-Oñate and Morales-Oñate (2015) <doi:10.33333/rp.vol51n2.05>.
Depends: R (>= 4.0.5)
License: GPL (>= 3)
Encoding: UTF-8
Imports: car
Repository: CRAN
URL: https://github.com/vmoprojs/MTest
BugReports: https://github.com/vmoprojs/MTest/issues
LazyData: true
NeedsCompilation: no
Packaged: 2023-07-17 14:40:11 UTC; victormorales
Author: Víctor Morales-Oñate [aut, cre]
(<https://orcid.org/0000-0003-1922-6571>),
Bolívar Morales-Oñate [aut] (<https://orcid.org/0000-0003-4980-8759>)
Date/Publication: 2023-07-18 09:20:08 UTC
9 changes: 9 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
e4a713766e6a31ba35c6a28213b47224 *DESCRIPTION
2618f06dafa7bc8a5b803832aac1dd0b *NAMESPACE
df74c3f377089e152b54b55b9920d9aa *R/MTest.R
7faf740febfe1c9cdb8ae89e8c3402fc *R/pairwiseKStest.R
439bf689fa27cf9affd0335332142165 *build/partial.rdb
89bc2ba09e07d8ada709c04e793c101c *data/simDataMTest.RData
e25f6fe5e25a17f31231f482c7de5fcd *man/MTest.Rd
57fdf0c7ce44ef6ef103af0eb0c05e44 *man/pairwiseKStest.Rd
30dfd752af61eaecfff2cffbdc217306 *man/simDataMTest.Rd
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
exportPattern("^[[:alpha:]]+")
importFrom("stats", "formula", "ks.test", "lm", "terms")
72 changes: 72 additions & 0 deletions R/MTest.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
####################################################
### File name: MTest.r
####################################################
MTest <- function(object, nboot = 100,
nsam = NULL,trace = FALSE,seed = NULL,
valor_vif = 0.9)
{
datos <- object$model
ff <- formula(object)
if(is.null(nsam)){nsam = nrow(datos)}

vals <- 1:nrow(datos)

if(!is.null(seed)) {set.seed(seed)}

sol.rsq <- NULL
sol.vif <- NULL
i = 1

tt <- proc.time()
while(i <=nboot)
{
sam <- sample(vals,nsam,replace = TRUE)
aux <- datos[sam,]
maux <- lm(ff,data = aux)
sm <- summary(maux)
if(any(attr(terms(object),"order")>1))
{
vif.vals <- suppressMessages(car::vif(maux,type = "predictor"))
vif.vals <- vif.vals[,3]
}else{
vif.vals <- car::vif(maux,type = "terms")
}

Raux <- (vif.vals-1)/vif.vals

s1 <- c(sm$r.squared,Raux)
sol.rsq <- rbind(sol.rsq,s1)

sol.vif <- rbind(sol.vif,vif.vals)

if(trace)
{
cat("Iteration",i,"out of ",nboot,"\n")
}
i = i+1
}
tt <- proc.time()-tt
print(tt)





pval_vif <- NULL

for(j in 2:ncol(sol.rsq))
{
pval_vif <- c(pval_vif,sum(sol.rsq[,j]>valor_vif)/nboot)
}
names(pval_vif) <- colnames(sol.rsq)[2:ncol(sol.rsq)]
pval_klein <- NULL
for(z in 2:ncol(sol.rsq))
{
pval_klein <- c(pval_klein,sum(sol.rsq[,1]<sol.rsq[,z])/nboot)
}
names(pval_klein) <- colnames(sol.rsq)[2:ncol(sol.rsq)]

colnames(sol.rsq) <- c("global",paste(names(datos)[-1],sep =""))
rownames(sol.rsq) <- 1:nrow(sol.rsq)
return(list(Bvals= sol.rsq,pval_vif = pval_vif,pval_klein=pval_klein))
}
23 changes: 23 additions & 0 deletions R/pairwiseKStest.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
####################################################
### File name: pairwiseKStets.r
####################################################
pairwiseKStest <- function(X,alternative="greater")
{
#Returns the p value of the pairwise KS test of X columns
n <- ncol(X)
sol <- matrix(NA, ncol = n, nrow = n)
for(i in 1:(n))
{
for(j in (1):n)
{
a <- suppressWarnings(ks.test(X[,i],X[,j],alternative = alternative))
sol[i,j] <- a$p.value
}
}
if(alternative=="less") {mes = ("alternative hypothesis: the CDF of x lies below that of y. Rows are `x` and Columns are `y`")}
if(alternative=="greater") {mes = ("alternative hypothesis: the CDF of x lies above that of y. Rows are `x` and Columns are `y`")}
if(alternative=="two.sided") {mes = ("alternative hypothesis: two-sided")}
colnames(sol) <- colnames(X)
rownames(sol) <- colnames(X)
return(list(KSpwMatrix = sol,mes=mes))
}
Binary file added build/partial.rdb
Binary file not shown.
Binary file added data/simDataMTest.RData
Binary file not shown.
74 changes: 74 additions & 0 deletions man/MTest.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
\name{MTest}
\alias{MTest}
\title{MTest}
\usage{
MTest(object, nboot = 100,
nsam = NULL,trace = FALSE,seed = NULL,
valor_vif = 0.9)
}
\description{

MTest is a nonparametric test based on bootstrap for detecting multicollinearity. This test gives statistical support to two of the most famous methods for detecting multicollinearity in applied work: Kleins rule and Variance Inflation Factor (VIF for essential multicollinearity).
}


\arguments{
\item{object}{an object representing a model of an appropriate class (mainly "lm"). This is used as the model in MTest.}
\item{nboot}{Numeric; number of bootstrap iterations to obtain the probability distribution of R squared (global and auxiliar).}
\item{nsam}{Numeric; sample size for bootstrap samples.}
\item{trace}{Logical; prints iteration process.}
\item{seed}{Numeric; seed value for the bootstrap in nboot parameter.}
\item{valor_vif}{Numeric; value to be compared in kleins rule.}
}

\details{
MTest generates a bootstrap distribution for the coefficient of determination which lets the researcher assess multicollinearity by setting a statistical significance \eqn{\alpha}, or more precisely, an achieved significance level (ASL) for a given threshold.


Consider the regression model

\eqn{
Y_i = \beta_0X_{0i} + \beta_1X_{1i} + \cdots+ \beta_pX_{pi} +u_i
}

where \eqn{i = 1,...,n}, \eqn{X_{j,i}} are the predictors with \eqn{j = 1,...,p}, \eqn{X_0 = 1} for all \eqn{i} and \eqn{u_i} is the gaussian error term.

In order to describe Klein's rule and VIF methods, we need to define \emph{auxiliary regressions} associated to model. An example of an auxiliary regressions is:
\eqn{
X_{2i} = \gamma_1X_{1i} + \gamma_3X_{3i} + \cdots+ \gamma_pX_{pi} +u_i.
}
In general, there are \eqn{p} auxiliary regressions and the dependent variable is omitted in each auxiliary regression. Let \eqn{R_{g}^{2}} be the coefficient of determination of the model and \eqn{R_{j}^{2}} the \eqn{j\text{th}} coefficient of determination of the \eqn{j\text{th}} auxiliary regression.
}
\value{
Returns an object of class \code{MTest}.
An object of class \code{MTest} is a list containing
at most the following components:
\item{pval_vif}{p values for vif test;}
\item{pval_klein}{p values for klein test;}
\item{Bvals}{A \eqn{nboot \times (p+1)} matrix where rows are the number of bootstap samples and the columns are \eqn{R_{g_{boot}}^{2}} and \eqn{R_{j_{boot}}^{2}} which are estimates of estimates of \eqn{R_{g}^{2}} and \eqn{R_{j}^{2}}, see Section \bold{Details}}
}
\author{Víctor Morales Oñate, \email{victor.morales@uv.cl}, \url{https://sites.google.com/site/moralesonatevictor/},\url{https://www.linkedin.com/in/vmoralesonate/}
Bolívar Morales Oñate, \email{bmoralesonate@gmail.com}, \url{https://sites.google.com/site/moralesonatevictor/}
}
\references{
Morales-Oñate, V., and Morales-Oñate, B. (2023). \emph{MTest: a Bootstrap Test for Multicollinearity}. Revista Politécnica, 51(2), 53–62. \doi{https://doi.org/10.33333/rp.vol51n2.05}
}
\examples{
library(MTest)
data(simDataMTest)
m1 <- lm(y~.,data = simDataMTest)
boot.sol <- MTest(m1,trace=FALSE,seed = 1,nboot = 50)
boot.sol$pval_vif
boot.sol$pval_klein
head(boot.sol$Bvals)
}
\keyword{Multicollinearity}
46 changes: 46 additions & 0 deletions man/pairwiseKStest.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
\name{pairwiseKStest}
\alias{pairwiseKStest}
\title{pairwiseKStest}
\usage{
pairwiseKStest(X,alternative="greater")
}
\description{
Returns the \eqn{p}-value of the columns of \code{X} (pairwisely).
}


\arguments{
\item{X}{Numeric; a matrix (\code{Bvals} output from \code{MTest} function) whose columns are to be compared.}
\item{alternative}{String; letter of the value, but the argument name must be given in full. Seeks.testfor the meanings of the possible values.}
}

\details{Using a pairwise Kolmogorov-Smirnov (KS) test of a given matrix \code{X}. In particular, if \code{X} is the \code{Bvals} output from \code{MTest} function, \code{pairwiseKStest} establishes a guide for an educated removal of variables that are causing multicolli-nearity.

Note that the matrix \eqn{B_{n_{boot}\times (p+1)}} (which is \code{Bvals} output from \code{MTest} function) allow us to inspect results in detail and make further tests such as boxplots, pariwise Kolmogorov-Smirnov (KS) of the predictors and so on.
}


\value{
Returns an object of class \code{pairwiseKStest}.
An object of class \code{pairwiseKStest} is a list containing
at most the following components:
\item{KSpwMatrix}{\eqn{p}-values matrix of pairwise KS testing;}
\item{mes}{Character; indicates the alternative hypothesis.}
}

\author{Víctor Morales Oñate, \email{victor.morales@uv.cl}, \url{https://sites.google.com/site/moralesonatevictor/},\url{https://www.linkedin.com/in/vmoralesonate/}
Bolívar Morales Oñate, \email{bmoralesonate@gmail.com}, \url{https://sites.google.com/site/moralesonatevictor/}
}

\references{
Morales-Oñate, V., and Morales-Oñate, B. (2023). \emph{MTest: a Bootstrap Test for Multicollinearity}. Revista Politécnica, 51(2), 5362. \doi{https://doi.org/10.33333/rp.vol51n2.05}
}

\examples{
library(MTest)
data(simDataMTest)
pairwiseKStest(X=simDataMTest)
}


\keyword{KS}
13 changes: 13 additions & 0 deletions man/simDataMTest.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
\name{simDataMTest}
\docType{data}
\alias{simDataMTest}
\title{Simulated data for MTest}
\description{
This data set helps testing functions in MTest package, the generating process is documented in the reference.
}
\usage{simDataMTest}
\format{A dataframe containing 10000 observations and four columns.}
\references{
Morales-Oñate, V., and Morales-Oñate, B. (2023). \emph{MTest: a Bootstrap Test for Multicollinearity}. Revista Politécnica, 51(2), 5362. \doi{https://doi.org/10.33333/rp.vol51n2.05}
}
\keyword{datasets}

0 comments on commit 951cc7d

Please sign in to comment.