Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
hdvinod authored and cran-robot committed Feb 17, 2023
0 parents commit 3ce23a8
Show file tree
Hide file tree
Showing 18 changed files with 1,029 additions and 0 deletions.
36 changes: 36 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,36 @@
Package: practicalSigni
Type: Package
Title: Practical Significance Ranking of Regressors
Version: 0.1.0
Date: 2023-02-16
Authors@R: c(person("Hrishikesh", "Vinod", role = c("aut", "cre"),
email= "vinod@fordham.edu"))
Encoding: UTF-8
Depends: R (>= 4.2.0), np (>= 0.60), xtable (>= 1.8), generalCorr (>=
1.2), NNS (>= 0.9), randomForest (>= 4.7),
Suggests: R.rsp
VignetteBuilder: R.rsp
Description: Consider a possibly nonlinear nonparametric regression
with p regressors. We provide evaluations by 13 methods to rank
regressors by their practical significance or importance using
various methods, including machine learning tools. Comprehensive
methods are as follows.
m6=Generalized partial correlation coefficient or
GPCC by Vinod (2021)<doi:10.1007/s10614-021-10190-x> and
Vinod (2022)<https://www.mdpi.com/1911-8074/15/1/32>.
m7= a generalization of psychologists' effect size incorporating
nonlinearity and many variables.
m8= local linear partial (dy/dxi) using the 'np' package for kernel
regressions.
m9= partial (dy/dxi) using the 'NNS' package.
m10= importance measure using the 'NNS' boost function.
m11= Shapley Value measure of importance (cooperative game theory).
m12 and m13= two versions of the random forest algorithm.
License: GPL (>= 2)
RoxygenNote: 7.2.3
NeedsCompilation: no
Packaged: 2023-02-16 20:40:15 UTC; vinod
Author: Hrishikesh Vinod [aut, cre]
Maintainer: Hrishikesh Vinod <vinod@fordham.edu>
Repository: CRAN
Date/Publication: 2023-02-17 10:00:09 UTC
17 changes: 17 additions & 0 deletions MD5
@@ -0,0 +1,17 @@
92c1f695ab61b5e80990cdd319bfc34d *DESCRIPTION
a32fe68dcc5cbc8ce3fca00bd5233ceb *NAMESPACE
af0760a4e6931ee6d4eba7dfada61307 *R/effSizCut.R
88191dcd47435a1b93d49d308d7380b4 *R/fncut.R
bb793c2f927ef0f65325c1f5bb34cff6 *R/pracSig13.R
4d3098a8fc36f18af32bbfa8a01e0651 *R/reportRank.R
9d749b214937d8705a43ebf337b5b90d *R/shapleyvalue.R
cb4924f3573383fca290a0a8e2ca5294 *build/partial.rdb
413069f2e65b506d9ce5f353d3dde1b2 *build/vignette.rds
9444c4cba73807f33dfd5c504eae8bd5 *inst/doc/practicalSigni-vignette.pdf
d245490b43d80a01ad113320d14427c7 *inst/doc/practicalSigni-vignette.pdf.asis
3c34399a2a413e3326a47930a7194bf7 *man/effSizCut.Rd
800d600966d85d4a64388adf63dae6a4 *man/fncut.Rd
7a69afc9d5d2cd34d022dff7ba99ab1a *man/pracSig13.Rd
52f100cc0eb342987faa543ec8e403f0 *man/reportRank.Rd
cd2e8d19432223a68fb2cb9eddcf6949 *man/shapleyvalue.Rd
d245490b43d80a01ad113320d14427c7 *vignettes/practicalSigni-vignette.pdf.asis
25 changes: 25 additions & 0 deletions NAMESPACE
@@ -0,0 +1,25 @@
# Generated by roxygen2: do not edit by hand

export(effSizCut)
export(fncut)
export(pracSig13)
export(reportRank)
export(shapleyvalue)
importFrom(NNS,NNS.boost)
importFrom(NNS,dy.d_)
importFrom(generalCorr,depMeas)
importFrom(generalCorr,kern)
importFrom(generalCorr,kern2)
importFrom(generalCorr,parcorVec)
importFrom(np,npreg)
importFrom(np,npregbw)
importFrom(randomForest,importance)
importFrom(randomForest,randomForest)
importFrom(stats,coef)
importFrom(stats,cor)
importFrom(stats,lm)
importFrom(stats,median)
importFrom(stats,residuals)
importFrom(stats,var)
importFrom(utils,combn)
importFrom(xtable,xtable)
84 changes: 84 additions & 0 deletions R/effSizCut.R
@@ -0,0 +1,84 @@
#' Compute Effect Sizes for continuous or categorical data
#'
#' Psychologists' so-called "effect size" reveals
#' the practical significance of only one
#' regressor. This function generalizes their algorithm
#' to two or more regressors (p>2). Generalization first
#' converts the xi regressor into a categorical treatment variable
#' with only two categories. One imagines that observations
#' larger than the
#' median (xit> median(xi)) are "treated," and those
#' below the median are "untreated."
#' The aim is the measure the size of the
#' (treatment) effect of (xi) on y. Denote other variables
#' with postscript "o" as (xo). Since we have p regressors in
#' our multiple regression, we need to remove the nonlinear
#' kernel regression effect of
#' other variables (xo) on y while focusing on the effect of xi.
#' There are two options in treating (xo) (i) letting xo be
#' as they are in the data (ii) converting xo to binary
#' at the median. One chooses the first option (i) by setting the
#' logical argument ane=TRUE in calling the function.
#' ane=TRUE is the default. Set ane=FALSE for the second option.
#'
#' @param y { (T x 1) vector of dependent variable data values}
#' @param bigx { (T x p) data matrix of xi regressor variables associated
#' with the regression}
#' @param ane {logical variable controls the treatment of other regressors.
#' If ane=TRUE (default), other regressors are used in kernel regression
#' without forcing them to be binary variables. When ane=FALSE,
#' the kernel regression removes the effect of other regressors
#' when other regressors are also binary type categorical variables,}
#' @return out vector with p values of t-statistics for p regressors
#' @note The aim is to answer the following question.
#' Which regressor has the largest
#' effect on the dependent variable? We assume that the signs
#' of regressors are already adjusted such that a numerically
#' larger effect size suggests that the corresponding regressor
#' is most important, having the largest effect size in explaining
#' y the dependent variable.
#' @author Prof. H. D. Vinod, Economics Dept., Fordham University, NY
#' @seealso \code{\link{pracSig13}}
#' @importFrom generalCorr kern
#' @examples
#' set.seed(9)
#' y=sample(1:15,replace = TRUE)
#' x1=sample(2:16, replace = TRUE)
#' x2=sample(3:17, replace = TRUE)
#' effSizCut(y,bigx=cbind(x1,x2),ane=TRUE)
#'
#' @export


effSizCut=function(y,bigx, ane=TRUE){ #get t-stats
p=NCOL(bigx)
out=rep(NA,p)
bigx2=apply(bigx,2,fncut)
logi=apply(bigx2,2,as.logical)
for ( i in 1:p){
kxi=generalCorr::kern(dep.y=y, reg.x=bigx2[,i],residuals=TRUE)#r=resid
rxi=residuals(kxi)#resid= (y - yhat) so yhat=(y-resid)
xihat=y-rxi
if(!ane){kother=generalCorr::kern(dep.y=y, reg.x=bigx[,-i],residuals=TRUE)
rother=residuals(kother)
xotherhat=y-rother }
if(ane) xotherhat=rep(mean(y,na.rm=TRUE),length(y))
mylogi=logi[,i]
myxi=xihat[mylogi]
xibar=mean(myxi,na.rm=TRUE)
xivar=var(myxi,na.rm=TRUE)
myxo=xotherhat[mylogi]
xobar=mean(myxo,na.rm=TRUE) #o=other
xovar=var(myxo,na.rm=TRUE)
tim1=length(myxi)-1 #Ti-1, where ti=Ti, m1=minus 1
# print(c("i,Ti-1= ",i,tim1))
if(xivar<10e-9) denom=1 #var=variance
if(xovar<10e-9) denom=1
# if(denom==1) print(c("t-stat=NA, zero var. col.No.",i))
if(denom !=1) denom=sqrt(xivar/tim1+xovar/tim1)
#print(xivar,xovar)

out[i]=(xibar-xobar)/denom }#end for loop
return(out)
}

23 changes: 23 additions & 0 deletions R/fncut.R
@@ -0,0 +1,23 @@
#' fncut auxiliary converts continuous data into two categories
#'
#'
#' This is an internal function of the R package practicalSigni
#' Psychologists use effect size to evaluate the practical
#' importance of a treatment on a dependent variable using
#' a binary [0,1] variable. Assuming numerical data, we
#' can always compute the median and regard values < or = the
#' median as zero and other values as unity.
#'
#' @param x {numerical vector of data values}
#' @return x {vector of zeros and ones split at the median}
#' @importFrom stats median
#' @author Prof. H. D. Vinod, Fordham University, NY
#'
#'
#' @export

fncut=function(x){
mix=median(x)
x[x<=mix]=0
x[x>mix]=1
return(x)}

0 comments on commit 3ce23a8

Please sign in to comment.