version 2011.03-1

cran · Mar 2, 2011 · 2daf182 · 2daf182
1 parent 2eebedf
commit 2daf182
Show file tree

Hide file tree

Showing 34 changed files with 486 additions and 209 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -0,0 +1,18 @@
+CITAN PACKAGE CHANGELOG
+
+
+2011.03-1  /2011-03-02/
+	* pareto2.htest.approx() - new function: Two-sample asymptotic h-test
+	     for equality of shape parameters for Type II-Pareto distributions
+	     with known common scale parameter
+	* pareto2.mlekestimate() - new function to estimate the shape parameter
+	     of the Pareto-type II distribution using unbiased MLE
+	* pareto2.goftest()      - params 'k' and 's' added
+	* index.rp(), index.lp() - now checking param 'p' for possible flaws
+	* pareto2.zsestimate()   - now checking param 'x' for possible flaws
+	* All functions using the Pareto-type II distribution now accept
+	     scale parameter s>0
+	* minor bug fixes
+
+2011.02-1  /2011-02-28/
+	* initial public release
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,20 +1,21 @@
 Package: CITAN
 Type: Package
-Version: 2011.02-1
-Date: 2011-02-28
+Version: 2011.03-1
+Date: 2011-03-02
 License: LGPL (>= 3)
 Depends: R (>= 2.8.0), ADGofTest
 Title: CITation ANalysis toolpack
 Author: Marek Gagolewski <gagolews@ibspan.waw.pl>
 Description: CITAN is a library of functions useful in - but not
         limited to - quantitative research in the field of
-        scientometrics.
+        scientometrics. It also contains various methods for dealing
+        with the Pareto-type II distribution.
 Maintainer: Marek Gagolewski <gagolews@ibspan.waw.pl>
 Collate: 'CITAN-package.R' 'distrib_hirsch.R' 'distrib_p2.R'
         'distrib_sstat.R' 'impact_functions.R' 'impact_visualization.R'
         'p2.confint.h.R' 'p2.confint.rho.approx.R' 'p2.confint.rho.R'
-        'p2.ftest.R' 'p2.goftest.R' 'p2.htest.R' 'p2.zsestimate.R'
-        'rho.R'
-Packaged: 2011-02-27 23:02:29 UTC; gagolews
+        'p2.estimate.R' 'p2.ftest.R' 'p2.goftest.R' 'p2.htest.approx.R'
+        'p2.htest.R' 'rho.R'
+Packaged: 2011-03-02 21:33:40 UTC; gagolews
 Repository: CRAN
-Date/Publication: 2011-02-28 09:27:32
+Date/Publication: 2011-03-04 07:15:32
diff --git a/NAMESPACE b/NAMESPACE
@@ -22,8 +22,10 @@ export(pareto2.confint.rho.approx)
 export(pareto2.confint.rho.upper)
 export(pareto2.confint.rho.lower)
 export(pareto2.confint.rho)
+export(pareto2.zsestimate)
+export(pareto2.mlekestimate)
 export(pareto2.ftest)
 export(pareto2.goftest)
+export(pareto2.htest.approx)
 export(pareto2.htest)
-export(pareto2.zsestimate)
 export(rho.get)
diff --git a/R/CITAN-package.R b/R/CITAN-package.R
@@ -1,7 +1,6 @@
-#' CITation ANalysis toolpack.
-#'
 #' CITAN is a library of functions useful in --- but not limited to ---
 #' quantitative research in the field of scientometrics.
+#' It also contains various methods for dealing with the Pareto-type II distribution.
 #'
 #' Fair and objective assessment methods of individual scientists
 #' had become the focus of scientometricians' attention since the
@@ -64,8 +63,10 @@
 #'    for general functions dealing with the Pareto distribution of the second kind,
 #'    including the c.d.f., p.d.f, quantiles and random deviates,
 #'    \item \code{\link{pareto2.htest}} --- two-sample \eqn{h}-test for equality of shape parameters based on the difference of \eqn{h}-indices,
+#'    \item \code{\link{pareto2.htest.approx}} --- two-sample asymptotic (approximate) \eqn{h}-test,
 #'    \item \code{\link{pareto2.ftest}} --- two-sample exact F-test for equality of shape parameters,
 #'    \item \code{\link{pareto2.zsestimate}} --- estimation of parameters using the Bayesian method (MMSE) developed by Zhang and Stevens (2009),
+#'    \item \code{\link{pareto2.mlekestimate}} --- estimation of shape parameter using the unbiased MLE,
 #'    \item \code{\link{pareto2.goftest}} --- goodness-of-fit tests,
 #'    \item \code{\link{pareto2.confint.rho}} and \code{\link{pareto2.confint.rho.approx}} --- exact and approximate (asymptotic)
 #'    confidence intervals for the \eqn{\kappa}-index basing on S-statistics,
@@ -80,17 +81,19 @@
 #'
 #'
 #' Please feel free to send any comments and suggestions (e.g.
-#' to include some new bibliometric impact indices) to the author.
+#' to include some new bibliometric impact indices) to the author
+#' (see also \url{http://www.ibspan.waw.pl/~gagolews}).
 #'
 #' For a complete list of functions, use \code{library(help="CITAN")}.
 #'
+#' Keywords: Hirsch's h-index, Egghe's g-index, L-statistics, S-statistics, bibliometrics, scientometrics, informetrics,
+#' webometrics, aggregation operators, impact functions, impact assessment.
+#'
 #' @name CITAN-package
 #' @aliases CITAN
 #' @docType package
 #' @title CITation ANalysis toolpack
 #' @author Marek Gagolewski \email{gagolews@@ibspan.waw.pl}
-#' @keywords Hirsch h-index, Egghe g-index, L-statistics, S-statistics, bibliometrics, scientometrics, informetrics,
-#' webometrics, aggregation operators, impact functions, impact assessment
 #' @references
 #' Dubois D., Prade H., Testemale C., Weighted fuzzy pattern matching, Fuzzy Sets and Systems 28, s. 313-331, 1988.\cr
 #' Egghe L., Theory and practise of the g-index, Scientometrics 69(1), 131-152, 2006.\cr

diff --git a/R/distrib_p2.R b/R/distrib_p2.R
@@ -1,13 +1,13 @@
 #' Random deviates generation for the Pareto Type-II (Lomax)  distribution with shape
-#' parameter equal to \eqn{k>0} and scale parameter equal to \eqn{s\ge 1}.
+#' parameter equal to \eqn{k>0} and scale parameter equal to \eqn{s>0}.
 #'
 #' @title Pareto distribution of the second kind - random deviates
 #' @param n number of observations.
 #' @param k vector of shape parameters, \eqn{k>0}.
-#' @param s vector of scale parameters, \eqn{s\ge 1}.
+#' @param s vector of scale parameters, \eqn{s>0}.
 #' @return The function returns generated pseudorandom deviates.
 #' @export
-#' @seealso \code{\link{dpareto2}}, \code{\link{ppareto2}}, \code{\link{qpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
+#' @seealso \code{\link{dpareto2}}, \code{\link{ppareto2}}, \code{\link{qpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.mlekestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
 rpareto2 <- function(n, k=1, s=1)
 {
 	s*((runif(n)^(-1/k)) - 1);
@@ -16,17 +16,17 @@ rpareto2 <- function(n, k=1, s=1)
 
 #' Cumulative distribution function
 #' for the Pareto Type-II (Lomax)  distribution with shape parameter equal to \eqn{k>0}
-#' and scale parameter equal to \eqn{s\ge 1}.
+#' and scale parameter equal to \eqn{s>0}.
 #'
 #' The c.d.f. at \eqn{x\ge 0} is given by \code{F(x)=1-s^k/(s+x)^k}.
 #'
 #' @title Pareto distribution of the second kind - c.d.f.
 #' @param q vector of quantiles.
 #' @param k vector of shape parameters, \eqn{k>0}.
-#' @param s vector of scale parameters, \eqn{s\ge 1}.
+#' @param s vector of scale parameters, \eqn{s>0}.
 #' @return The function gives the c.d.f..
 #' @export
-#' @seealso \code{\link{dpareto2}}, \code{\link{qpareto2}}, \code{\link{rpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
+#' @seealso \code{\link{dpareto2}}, \code{\link{qpareto2}}, \code{\link{rpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.mlekestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
 ppareto2 <- function(q, k=1, s=1)
 {
 	ifelse(q<0, 0, (1-(s/(s+q))^k));
@@ -35,33 +35,33 @@ ppareto2 <- function(q, k=1, s=1)
 
 #' Quantile function
 #' for the Pareto Type-II (Lomax) distribution with shape parameter equal to \eqn{k>0}
-#' and scale parameter equal to \eqn{s\ge 1}.
+#' and scale parameter equal to \eqn{s>0}.
 #'
 #' @title Pareto distribution of the second kind - quantiles
 #' @param p vector of probabilities, \eqn{p\in(0,1)}.
 #' @param k vector of shape parameters, \eqn{k>0}.
-#' @param s vector of scale parameters, \eqn{s\ge 1}.
+#' @param s vector of scale parameters, \eqn{s>0}.
 #' @return The function gives the theoretical quantiles.
 #' @export
-#' @seealso \code{\link{dpareto2}}, \code{\link{ppareto2}}, \code{\link{rpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
+#' @seealso \code{\link{dpareto2}}, \code{\link{ppareto2}}, \code{\link{rpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.mlekestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
 qpareto2 <- function(p, k=1, s=1)
 {
 	ifelse(p<0 | p>1, NA, s*((1-p)^(-1/k)-1));
 }
 
 
 #' Density function for the Pareto Type-II (Lomax)  distribution with shape parameter
-#' equal to \eqn{k>0} and scale parameter equal to \eqn{s\ge 1}.
+#' equal to \eqn{k>0} and scale parameter equal to \eqn{s>0}.
 #'
 #' The p.d.f. at \eqn{x\ge 0} is given by \code{f(x)=k*s^k/(s+x)^(k+1)}.
 #'
 #' @title Pareto distribution of the second kind - density
 #' @param x vector of quantiles.
 #' @param k vector of shape parameters, \eqn{k>0}.
-#' @param s vector of scale parameters, \eqn{s\ge 1}.
+#' @param s vector of scale parameters, \eqn{s>0}.
 #' @return The function gives the density.
 #' @export
-#' @seealso \code{\link{ppareto2}}, \code{\link{qpareto2}}, \code{\link{rpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
+#' @seealso \code{\link{ppareto2}}, \code{\link{qpareto2}}, \code{\link{rpareto2}}, \code{\link{pareto2.zsestimate}}, \code{\link{pareto2.mlekestimate}}, \code{\link{pareto2.goftest}}, \code{\link{pareto2.ftest}}
 dpareto2 <- function(x, k=1, s=1)
 {
 	ifelse(x<=0, 0, k/(s+x)*(s/(s+x))^k);

diff --git a/R/impact_functions.R b/R/impact_functions.R
@@ -20,7 +20,10 @@
 #' @param disable.check logical; \code{TRUE} to disable some validity checks on the input vector; defaults \code{FALSE}.
 #' @param algorithm type of algorithm, "linear-time" or "log-time" (default).
 #' @return The function returns a single number or NA if improper input has been given.
-#' @seealso \code{\link{index.g}}, \code{\link{index.rp}}, \code{\link{index.lp}}, \code{\link{Sstat}}, \code{\link{Sstat2}}, \code{\link{phirsch}}, \code{\link{dhirsch}}, \code{\link{pareto2.confint.h}}, \code{\link{pareto2.htest}}
+#' @seealso
+#' \code{\link{index.g}}, \code{\link{index.rp}}, \code{\link{index.lp}}, \code{\link{Sstat}}, \code{\link{Sstat2}},
+#' \code{\link{phirsch}}, \code{\link{dhirsch}}, \code{\link{pareto2.confint.h}}, \code{\link{pareto2.htest}},
+#' \code{\link{pareto2.htest.approx}}
 #'
 #' @examples
 #' authors <- list(  # a list of numeric sequences
@@ -149,14 +152,14 @@ index.rp <- function(x, p=Inf, sorted.dec=FALSE, disable.check=FALSE)
 		x <- x[!is.na(x)];
 	}
 
+	if (mode(p) != "numeric" || length(p)!=1 || p < 1) stop("'p' should be a single numeric value >= 1");
+
 	if (!sorted.dec)
 		x <- sort(x, decreasing=TRUE);
 
-	if (p < 1)
-	{
-		return(NA);
-	} else if (is.finite(p))
+	if (is.finite(p))
 	{
+		if (p > 50) warning("'p' is quite large. possible accuracy problems. maybe you should try 'p'==Inf?");
 		.C("index_rp_finite", as.double(x), as.integer(length(x)), as.double(p), out=double(1), DUP=FALSE, PACKAGE="CITAN")$out;
 	} else
 	{
@@ -215,14 +218,14 @@ index.lp <- function(x, p=Inf, sorted.dec=FALSE, disable.check=FALSE)
 		x <- x[!is.na(x)];
 	}
 
+	if (mode(p) != "numeric" || length(p)!=1 || p < 1) stop("'p' should be a single numeric value >= 1");
+
 	if (!sorted.dec)
 		x <- sort(x, decreasing=TRUE);
 
-	if (p < 1)
-	{
-		return(NA);
-	} else if (is.finite(p))
+	if (is.finite(p))
 	{
+		if (p > 50) warning("'p' is quite large. possible accuracy problems. maybe you should try 'p'==Inf?");
 		.C("index_lp_finite", as.double(x), as.integer(length(x)), as.double(p), integer(length(x)+1), out=double(2), DUP=FALSE, PACKAGE="CITAN")$out;
 	} else
 	{

diff --git a/R/p2.confint.h.R b/R/p2.confint.h.R
@@ -1,6 +1,6 @@
 #' Computes the exact two-sided confidence interval for the theoretical \eqn{h}-index
 #' of a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #'
 #' The confidence interval bases on the method \code{\link{pareto2.confint.rho}}.
 #'
@@ -15,7 +15,7 @@
 #'
 #' @title Two-sided exact confidence interval for the theoretical h-index
 #' @param h observed value of the \eqn{h}-index
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Vector of length 2 with the computed bounds of the confidence interval.
@@ -26,6 +26,8 @@ pareto2.confint.h <- function(h, s, n, conf.level=0.95)
 	if (length(h) != 1 || h < 0 || h > n)
 		stop("Incorrect h value!");
 
+	if (mode(s) != "numeric" || length(s) != 1 || s <= 0) stop("'s' should be > 0");
+
 	kappa    <- function(x) { pmax(0,pmin(1,x))*gamma; }
 	gamma <- 1-conf.level;
 	h <- round(h);

diff --git a/R/p2.confint.rho.R b/R/p2.confint.rho.R
@@ -87,7 +87,7 @@
 
 #' Computes the exact right-sided confidence interval for the kappa-index of
 #' a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #' The confidence interval bases on the observed value
 #' of S-statistic w.r.t. to the given control function \eqn{\kappa}.
 #'
@@ -101,7 +101,7 @@
 #' @title Right-sided exact confidence interval for the kappa-index
 #' @param v observed value of the S-statistic w.r.t. \eqn{\kappa}.
 #' @param kappa an increasing function, \eqn{\kappa}, a so-called control function.
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Upper bound of the confidence interval.
@@ -116,6 +116,8 @@ pareto2.confint.rho.upper <- function(v, kappa, s, n, conf.level=0.95)
 
 	gamma <- 1-conf.level;
 
+	if (mode(s) != "numeric" || length(s) != 1 || s <= 0) stop("'s' should be > 0");
+
 	if (!is.numeric(v) || length(v) != 1)
 		stop("v must be a single numeric value")
 
@@ -141,7 +143,7 @@ pareto2.confint.rho.upper <- function(v, kappa, s, n, conf.level=0.95)
 
 #' Computes the exact left-sided confidence interval for the kappa-index of
 #' a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #' The confidence interval bases on the observed value
 #' of S-statistic w.r.t. to the given control function \eqn{\kappa}.
 #'
@@ -155,7 +157,7 @@ pareto2.confint.rho.upper <- function(v, kappa, s, n, conf.level=0.95)
 #' @title Left-sided exact confidence interval for the kappa-index
 #' @param v observed value of the S-statistic w.r.t. \eqn{\kappa}.
 #' @param kappa an increasing function, \eqn{\kappa}, a so-called control function.
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Lower bound of the confidence interval.
@@ -171,6 +173,8 @@ pareto2.confint.rho.lower <- function(v, kappa, s, n, conf.level=0.95)
 
 	gamma <- 1-conf.level;
 
+	if (mode(s) != "numeric" || length(s) != 1 || s <= 0) stop("'s' should be > 0");
+
 	if (!is.numeric(v) || length(v) != 1)
 		stop("v must be a single numeric value")
 
@@ -198,7 +202,7 @@ pareto2.confint.rho.lower <- function(v, kappa, s, n, conf.level=0.95)
 
 #' Computes the exact two-sided confidence interval for the kappa-index of
 #' a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #' The confidence interval bases on the observed value
 #' of S-statistic w.r.t. to the given control function \eqn{\kappa}.
 #'
@@ -212,7 +216,7 @@ pareto2.confint.rho.lower <- function(v, kappa, s, n, conf.level=0.95)
 #' @title Two-sided exact confidence interval for the kappa-index
 #' @param v observed value of the S-statistic w.r.t. \eqn{\kappa}.
 #' @param kappa an increasing function, \eqn{\kappa}, a so-called control function.
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Vector of length 2 with the computed bounds of the confidence interval.

diff --git a/R/p2.confint.rho.approx.R b/R/p2.confint.rho.approx.R
@@ -1,6 +1,6 @@
 #' Computes the approximate (asymptotic) left-sided confidence interval for the kappa-index of
 #' a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #' The confidence interval bases on the observed value
 #' of S-statistic w.r.t. to the given control function \eqn{\kappa}.
 #'
@@ -15,7 +15,7 @@
 #' @param v observed value of the S-statistic w.r.t. \eqn{\kappa}.
 #' @param kappa an increasing function, \eqn{\kappa}, a so-called control function.
 #' @param kappaInvDer the derivative of the inverse of \eqn{\kappa}.
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Lower bound of the confidence interval.
@@ -30,6 +30,8 @@ pareto2.confint.rho.approx.lower <- function(v, kappa, kappaInvDer, s, n, conf.l
 	if (!is.numeric(v) || length(v) != 1)
 		stop("v must be a single numeric value");
 
+	if (mode(s) != "numeric" || length(s) != 1 || s <= 0) stop("'s' should be > 0");
+
 	if (v < 1e-6) return(0.0);
 # 	if (v > 1-1e-13) return (1.0);
 	if (gamma < 1e-5) return(0.0);
@@ -61,7 +63,7 @@ pareto2.confint.rho.approx.lower <- function(v, kappa, kappaInvDer, s, n, conf.l
 
 #' Computes the approximate (asymptotic) right-sided confidence interval for the kappa-index of
 #' a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #' The confidence interval bases on the observed value
 #' of S-statistic w.r.t. to the given control function \eqn{\kappa}.
 #'
@@ -76,7 +78,7 @@ pareto2.confint.rho.approx.lower <- function(v, kappa, kappaInvDer, s, n, conf.l
 #' @param v observed value of the S-statistic w.r.t. \eqn{\kappa}.
 #' @param kappa an increasing function, \eqn{\kappa}, a so-called control function.
 #' @param kappaInvDer the derivative of the inverse of \eqn{\kappa}.
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Upper bound of the confidence interval.
@@ -91,6 +93,8 @@ pareto2.confint.rho.approx.upper <- function(v, kappa, kappaInvDer, s, n, conf.l
 	if (!is.numeric(v) || length(v) != 1)
 		stop("v must be a single numeric value");
 
+	if (mode(s) != "numeric" || length(s) != 1 || s <= 0) stop("'s' should be > 0");
+
 	if (v > 1-1e-6) return(1.0);
 # 	if (v < 1e-13) return (0.0);
 	if (gamma < 1e-5) return(1.0);
@@ -123,7 +127,7 @@ pareto2.confint.rho.approx.upper <- function(v, kappa, kappaInvDer, s, n, conf.l
 
 #' Computes the approximate (asymptotic) two-sided confidence interval for the kappa-index of
 #' a probability distribution in an \eqn{(X_1,\dots,X_n)} i.i.d. Pareto-type II
-#' model with known scale parameter \eqn{s\ge 1}.
+#' model with known scale parameter \eqn{s>0}.
 #' The confidence interval bases on the observed value
 #' of S-statistic w.r.t. to the given control function \eqn{\kappa}.
 #'
@@ -138,7 +142,7 @@ pareto2.confint.rho.approx.upper <- function(v, kappa, kappaInvDer, s, n, conf.l
 #' @param v observed value of the S-statistic w.r.t. \eqn{\kappa}.
 #' @param kappa an increasing function, \eqn{\kappa}, a so-called control function.
 #' @param kappaInvDer the derivative of the inverse of \eqn{\kappa}.
-#' @param s scale parameter, \eqn{s\ge 1}.
+#' @param s scale parameter, \eqn{s>0}.
 #' @param n sample size.
 #' @param conf.level confidence level; defaults 0.95.
 #' @return Vector of length 2 with the computed bounds of the confidence interval.