Skip to content

Commit

Permalink
version 0.0-21
Browse files Browse the repository at this point in the history
  • Loading branch information
Thoralf Mildenberger authored and gaborcsardi committed Nov 12, 2009
0 parents commit 06389ff
Show file tree
Hide file tree
Showing 11 changed files with 940 additions and 0 deletions.
15 changes: 15 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,15 @@
Package: histogram
Type: Package
Title: Construction of regular and irregular histograms with different
options for automatic choice of bins
Version: 0.0-21
Date: 2009-11-12
Author: Thoralf Mildenberger, Yves Rozenholc, David Zasada.
Maintainer: Thoralf Mildenberger <mildenbe@statistik.tu-dortmund.de>
Description: Automatic construction of regular and irregular histograms
as described in Rozenholc/Mildenberger/Gather (2009).
License: GPL (>= 2)
LazyLoad: yes
Packaged: 2009-11-12 13:19:30 UTC; root
Repository: CRAN
Date/Publication: 2009-11-12 16:02:07
2 changes: 2 additions & 0 deletions NAMESPACE
@@ -0,0 +1,2 @@
export(histogram)

49 changes: 49 additions & 0 deletions R/DynamicExtreme.R
@@ -0,0 +1,49 @@
`DynamicExtreme` <-
function(FctWeight,n,Dmax,mini=TRUE,msg=TRUE) {

###################################################################
# use only for vectorizing the computation
OptimizePath <- function(D) {
ancestor0 = matrix(nrow=n-D+1)
cumWeight0 = matrix(nrow=n-D+1)

for(i in D:n) {
tmp <- cumWeight[(D-1):(i-1),D-1]+weight[D:i,i+1]

if (mini) ancestor0[i-D+1] <- which.min(tmp)
else ancestor0[i-D+1] <- which.max(tmp)
cumWeight0[i-D+1] <- tmp[ancestor0[i-D+1]]
}
ancestor[D:n,D] <<- ancestor0 + (D-2)
cumWeight[D:n,D] <<- cumWeight0
}
###################################################################

if (n==1) return(list(extreme=FctWeight(1,2),ancestor=cbind(0,0) ))

# matrix of extreme values reached in d=1:Dmax steps
cumWeight <- matrix(nrow=n,ncol=Dmax)

# matrix of the ancestors
ancestor <- matrix(0,nrow=n,ncol=Dmax)

# weight matrix for combinatorial
weight <- matrix(nrow=n+1,ncol=n+1)

# first compute all n*(n-1)/2 weights
if (msg) message("- Computing weights for dynamic programming algorithm.")

for(i in 1:n)
weight[i,(i+1):(n+1)] = mapply(FctWeight,rep(i,n-i+1),(i+1):(n+1));

cumWeight[,1] <- weight[1,2:(n+1)]

if (msg) message("- Now performing dynamic optimization.")

mapply(OptimizePath,2:Dmax)
extreme <- cumWeight[n,]


list(extreme=extreme,ancestor=ancestor)
}

7 changes: 7 additions & 0 deletions R/DynamicList.R
@@ -0,0 +1,7 @@
`DynamicList` <-
function(C,B,D) {
L <- PathList(C,D) + 1
bounds <- B[L]
return(bounds)
}

9 changes: 9 additions & 0 deletions R/PathList.R
@@ -0,0 +1,9 @@
`PathList` <-
function(C,D){
L <- nrow(C)
for(i in D:1) {
L <- c(C[L[1],i],L)
}
return(L)
}

44 changes: 44 additions & 0 deletions R/greedy.R
@@ -0,0 +1,44 @@
histgreedy<-function(BL,NL,n,binmax,verbose=verbose){
nB<-length(BL)
if (nB==2) return(BL)

#compute possible increses of likelihood by splitting bins

inclikelihood <- function(i,j) {
if ((i+1)==j) return(NULL)
else if (BL[i]==BL[j]) return(rep(-Inf,j-i-1))
else {
k <- (i+1):(j-1)
old <- (NL[j]-NL[i])*log((NL[j]-NL[i])/(BL[j]-BL[i])/n)
new<-rep(0,length(k))
indv<-(NL[j]-NL[k]>0)
new[indv]<-((NL[j]-NL[k])*log((NL[j]-NL[k])/(BL[j]-BL[k])/n))[indv]
indv<-(NL[k]-NL[i]>0)
new[indv]<-new[indv]+((NL[k]-NL[i])*log((NL[k]-NL[i])/(BL[k]-BL[i])/n))[indv]-old
new
}
}
breaks <- c(1,nB)
# compute increment of likelihood obtained by adding one bin
increment <- c(-Inf,inclikelihood(breaks[1],breaks[2]),-Inf)

while ((max(increment)>0)&&(length(breaks)<(binmax+1))) {
maxi <- which.max(increment)
here <- sum(breaks<maxi)
i <- breaks[here];
j <- breaks[here+1];
debut<-increment[1:i]; debut[i]=-Inf
fin<-increment[j:nB]; fin[1]=-Inf
gche <- inclikelihood(i,maxi)
drte <- inclikelihood(maxi,j)
increment <- c(debut,gche,-Inf,drte,fin)
breaks <- c(breaks[1:here],maxi,breaks[(here+1):length(breaks)])
}

breaks <- BL[breaks]
if (verbose) message(paste('- Pre-selected finest partition with',length(breaks)-1,'bins.'))
return(breaks)
}



73 changes: 73 additions & 0 deletions R/histogram.R
@@ -0,0 +1,73 @@
`histogram` <- function( y, type="combined", grid="data", breaks=NULL, penalty="default", greedy=TRUE, right=TRUE, control=list(), verbose=TRUE, plot=TRUE ) {

# check data vector
if ( length(unique(y))<2 )
stop( "data vector must consist of at least two distinct values!" )

# handle invalid penalty/type combination
penalty = tolower( penalty )
if ( any( penalty==c("br","nml","sc","mdl") ) && ( tolower(type)!="regular" && tolower(type)!="r" ) ) {
warning( "Penalty '", penalty, "' not supported for irregular histograms - creating regular histogram." )
type <- "regular"
}
# handle invalid parameter "breaks"
if ( length(breaks) > 1 ) {
warning( "Breaks is a vector of length ", length(breaks), " - using first value only", call.=FALSE )
breaks = breaks[1]
}
if ( ! is.null(breaks) ) {
breaks <- floor( breaks )
if ( breaks < 2 ) {
warning( "Breaks must be an integer <= 2 - using breaks=2", call.=FALSE )
breaks <- 2
}
}

# histogram type: regular
if ( tolower(type)=="regular" || tolower(type)=="r" )
out<-histogram.regular( y, penalty=penalty, breaks=breaks, control=control, right=right, verbose=verbose, plot=plot )$H

# histogram type: irregular
if ( tolower(type)=="irregular" || tolower(type)=="i" )
out<-histogram.irregular( y, grid=grid, breaks=breaks, penalty=penalty, greedy=greedy, control=control, right=right, verbose=verbose, plot=plot )$H

# histogram type: combined
if ( tolower(type)=="combined" || tolower(type)=="c" ) {

# check penalty-parameter
penalty = tolower( penalty )
if ( ! any( penalty==c("default","pena","penb","penr") ) ) {
warning( "Penalty '", penalty, "' not supported for combined histograms - using default setting for irregular histograms", call.=FALSE )
penalty = "default"
}

if ( verbose )
message( "Choosing between regular and irregular histogram:\n\n1.", appendLF=FALSE )
out1 <- histogram.regular( y, penalty="br", breaks=NULL, control=control, right=right, verbose=verbose, plot=FALSE )
if ( verbose )
message( "2.",appendLF=FALSE )
out2 <- histogram.irregular( y, grid=grid, breaks=NULL, penalty=penalty, greedy=greedy, control=control, right=right, verbose=verbose, plot=FALSE )

#compare maximized likelihood or frgular and irregular histogram

if (out1$lhvalue>=out2$lhvalue) {
out<-out1$H
if (verbose)
message("\nRegular histogram chosen.\n")
}
else {
out<-out2$H
if ( verbose )
message("\nIrregular histogram chosen.\n")
}

if ( plot )
plot(out)
}


if ( verbose )
print( out )

return( invisible(out) )
}

0 comments on commit 06389ff

Please sign in to comment.