version 0.1.0

cran · Feb 17, 2023 · 3ce23a8 · 3ce23a8
commit 3ce23a8
Show file tree

Hide file tree

Showing 18 changed files with 1,029 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,36 @@
+Package: practicalSigni
+Type: Package
+Title: Practical Significance Ranking of Regressors
+Version: 0.1.0
+Date: 2023-02-16
+Authors@R: c(person("Hrishikesh", "Vinod", role = c("aut", "cre"),
+      email= "vinod@fordham.edu"))
+Encoding: UTF-8
+Depends: R (>= 4.2.0), np (>= 0.60), xtable (>= 1.8), generalCorr (>=
+        1.2), NNS (>= 0.9), randomForest (>= 4.7),
+Suggests: R.rsp
+VignetteBuilder: R.rsp
+Description: Consider a possibly nonlinear nonparametric regression
+   with p regressors. We provide evaluations by 13 methods to rank
+   regressors by their practical significance or importance using 
+   various methods, including machine learning tools. Comprehensive
+   methods are as follows. 
+   m6=Generalized partial correlation coefficient or
+   GPCC by Vinod (2021)<doi:10.1007/s10614-021-10190-x> and
+   Vinod (2022)<https://www.mdpi.com/1911-8074/15/1/32>.
+   m7= a generalization of psychologists' effect size incorporating 
+   nonlinearity and many variables.
+   m8= local linear partial (dy/dxi) using the 'np' package for kernel 
+   regressions.
+   m9= partial (dy/dxi) using the 'NNS' package.
+   m10= importance measure using the 'NNS' boost function.
+   m11= Shapley Value measure of importance (cooperative game theory).
+   m12 and m13= two versions of the random forest algorithm.
+License: GPL (>= 2)
+RoxygenNote: 7.2.3
+NeedsCompilation: no
+Packaged: 2023-02-16 20:40:15 UTC; vinod
+Author: Hrishikesh Vinod [aut, cre]
+Maintainer: Hrishikesh Vinod <vinod@fordham.edu>
+Repository: CRAN
+Date/Publication: 2023-02-17 10:00:09 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,17 @@
+92c1f695ab61b5e80990cdd319bfc34d *DESCRIPTION
+a32fe68dcc5cbc8ce3fca00bd5233ceb *NAMESPACE
+af0760a4e6931ee6d4eba7dfada61307 *R/effSizCut.R
+88191dcd47435a1b93d49d308d7380b4 *R/fncut.R
+bb793c2f927ef0f65325c1f5bb34cff6 *R/pracSig13.R
+4d3098a8fc36f18af32bbfa8a01e0651 *R/reportRank.R
+9d749b214937d8705a43ebf337b5b90d *R/shapleyvalue.R
+cb4924f3573383fca290a0a8e2ca5294 *build/partial.rdb
+413069f2e65b506d9ce5f353d3dde1b2 *build/vignette.rds
+9444c4cba73807f33dfd5c504eae8bd5 *inst/doc/practicalSigni-vignette.pdf
+d245490b43d80a01ad113320d14427c7 *inst/doc/practicalSigni-vignette.pdf.asis
+3c34399a2a413e3326a47930a7194bf7 *man/effSizCut.Rd
+800d600966d85d4a64388adf63dae6a4 *man/fncut.Rd
+7a69afc9d5d2cd34d022dff7ba99ab1a *man/pracSig13.Rd
+52f100cc0eb342987faa543ec8e403f0 *man/reportRank.Rd
+cd2e8d19432223a68fb2cb9eddcf6949 *man/shapleyvalue.Rd
+d245490b43d80a01ad113320d14427c7 *vignettes/practicalSigni-vignette.pdf.asis
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,25 @@
+# Generated by roxygen2: do not edit by hand
+
+export(effSizCut)
+export(fncut)
+export(pracSig13)
+export(reportRank)
+export(shapleyvalue)
+importFrom(NNS,NNS.boost)
+importFrom(NNS,dy.d_)
+importFrom(generalCorr,depMeas)
+importFrom(generalCorr,kern)
+importFrom(generalCorr,kern2)
+importFrom(generalCorr,parcorVec)
+importFrom(np,npreg)
+importFrom(np,npregbw)
+importFrom(randomForest,importance)
+importFrom(randomForest,randomForest)
+importFrom(stats,coef)
+importFrom(stats,cor)
+importFrom(stats,lm)
+importFrom(stats,median)
+importFrom(stats,residuals)
+importFrom(stats,var)
+importFrom(utils,combn)
+importFrom(xtable,xtable)
diff --git a/R/effSizCut.R b/R/effSizCut.R
@@ -0,0 +1,84 @@
+#' Compute Effect Sizes for continuous or categorical data
+#'
+#' Psychologists' so-called "effect size" reveals
+#' the practical significance of only one
+#' regressor. This function generalizes their algorithm
+#' to two or more regressors (p>2). Generalization first
+#' converts the xi regressor into a categorical treatment variable
+#' with only two categories. One imagines that observations
+#' larger than the
+#' median (xit> median(xi)) are "treated," and those
+#' below the median are "untreated."
+#' The aim is the measure the size of the
+#' (treatment) effect of (xi) on y. Denote other variables
+#' with postscript "o" as (xo). Since we have p regressors in
+#' our multiple regression, we need to remove the nonlinear
+#' kernel regression effect of
+#' other variables (xo) on y while focusing on the effect of xi.
+#' There are two options in treating (xo) (i) letting xo be
+#' as they are in the data (ii) converting xo to binary
+#' at the median. One chooses the first option (i) by setting the
+#' logical argument ane=TRUE in calling the function.
+#' ane=TRUE is the default. Set ane=FALSE for the second option.
+#'
+#' @param y { (T x 1) vector of dependent variable data values}
+#' @param bigx { (T x p) data matrix of xi regressor variables associated
+#'  with the regression}
+#' @param ane {logical variable controls the treatment of other regressors.
+#'  If ane=TRUE (default), other regressors are used in kernel regression
+#'  without forcing them to be binary variables. When ane=FALSE,
+#'  the kernel regression removes the effect of other regressors
+#'  when other regressors are also binary type categorical variables,}
+#' @return out vector with p values of t-statistics for p regressors
+#' @note The aim is to answer the following question.
+#' Which regressor has the largest
+#' effect on the dependent variable? We assume that the signs
+#' of regressors are already adjusted such that a numerically
+#' larger effect size suggests that the corresponding regressor
+#' is most important, having the largest effect size in explaining
+#' y the dependent variable.
+#' @author Prof. H. D. Vinod, Economics Dept., Fordham University, NY
+#' @seealso \code{\link{pracSig13}}
+#' @importFrom generalCorr kern
+#' @examples
+#' set.seed(9)
+#'  y=sample(1:15,replace = TRUE)
+#'  x1=sample(2:16, replace = TRUE)
+#'  x2=sample(3:17, replace = TRUE)
+#' effSizCut(y,bigx=cbind(x1,x2),ane=TRUE)
+#'
+#' @export
+
+
+effSizCut=function(y,bigx, ane=TRUE){  #get t-stats
+  p=NCOL(bigx)
+  out=rep(NA,p)
+bigx2=apply(bigx,2,fncut)
+logi=apply(bigx2,2,as.logical)
+  for ( i in 1:p){
+  kxi=generalCorr::kern(dep.y=y, reg.x=bigx2[,i],residuals=TRUE)#r=resid
+  rxi=residuals(kxi)#resid= (y - yhat) so yhat=(y-resid)
+  xihat=y-rxi
+if(!ane){kother=generalCorr::kern(dep.y=y, reg.x=bigx[,-i],residuals=TRUE)
+  rother=residuals(kother)
+  xotherhat=y-rother }
+if(ane) xotherhat=rep(mean(y,na.rm=TRUE),length(y))
+  mylogi=logi[,i]
+  myxi=xihat[mylogi]
+  xibar=mean(myxi,na.rm=TRUE)
+  xivar=var(myxi,na.rm=TRUE)
+  myxo=xotherhat[mylogi]
+  xobar=mean(myxo,na.rm=TRUE) #o=other
+  xovar=var(myxo,na.rm=TRUE)
+  tim1=length(myxi)-1  #Ti-1, where ti=Ti, m1=minus 1
+#  print(c("i,Ti-1= ",i,tim1))
+  if(xivar<10e-9) denom=1 #var=variance
+  if(xovar<10e-9) denom=1
+#  if(denom==1) print(c("t-stat=NA, zero var. col.No.",i))
+  if(denom !=1) denom=sqrt(xivar/tim1+xovar/tim1)
+  #print(xivar,xovar)
+
+  out[i]=(xibar-xobar)/denom  }#end for loop
+  return(out)
+}
+
diff --git a/R/fncut.R b/R/fncut.R
@@ -0,0 +1,23 @@
+#'  fncut auxiliary converts continuous data into two categories
+#'
+#'
+#'  This is an internal function of the R package practicalSigni
+#'  Psychologists use effect size to evaluate the practical
+#'  importance of a treatment on a dependent variable using
+#'  a binary [0,1] variable.  Assuming numerical data, we
+#'  can always compute the median and regard values < or = the
+#'  median as zero and other values as unity.
+#'
+#' @param x {numerical vector of data values}
+#' @return x {vector of zeros and ones split at the median}
+#' @importFrom stats median
+#' @author Prof. H. D. Vinod, Fordham University, NY
+#'
+#'
+#' @export
+
+fncut=function(x){
+  mix=median(x)
+  x[x<=mix]=0
+  x[x>mix]=1
+  return(x)}