new routine for a single matrix fit

HISKP-LQCD · Jun 29, 2013 · 48ffc13 · 48ffc13
1 parent 35708bb
commit 48ffc13
Show file tree

Hide file tree

Showing 4 changed files with 208 additions and 10 deletions.
diff --git a/R/bootstrapnumber.R b/R/bootstrapnumber.R
@@ -1,4 +1,4 @@
-mean.index <- function(data, indexvector) {
+meanindexed <- function(data, indexvector) {
   return(invisible(mean(data[indexvector])))
 }
 
@@ -15,7 +15,7 @@ bootstrap.analysis <- function(data, skip=0, boot.R=100,
   cat("mean value = ", data.mean, "\n")
   cat("naive error = ", error.naive, "\n")
 
-  data.boot <- boot(data=data, statistic=mean.index, R=boot.R, stype="i")
+  data.boot <- boot(data=data, statistic=meanindexed, R=boot.R, stype="i")
   data.boot.ci <- boot.ci(data.boot, type = c("norm", "basic", "perc"))
 
   cat("                  mean        -err           +err            stderr        bias\n")
@@ -41,7 +41,7 @@ bootstrap.analysis <- function(data, skip=0, boot.R=100,
   while((length(data))/boot.l > 20) {
     ndata <- block.ts(data, l=boot.l)
     j <- j+1
-    data.tsboot <- boot(ndata, statistic=mean.index, R=boot.R)
+    data.tsboot <- boot(ndata, statistic=meanindexed, R=boot.R)
     ## use the same seed ...
     set.seed(data.tsboot$seed)
     data.sdboot <- boot(ndata, statistic=sd.index, R=boot.R)

diff --git a/R/matrixfit.R b/R/matrixfit.R
@@ -0,0 +1,121 @@
+bootstrap.meanerror <- function(data, R=400, l=20) {
+  bootit <- boot(block.ts(data, l=l), meanindexed, R=R)
+  return(sd(bootit$t))
+}
+
+matrixChisqr <- function(par, t, y, M, T, parind) {
+  z <- (y-0.5*par[parind[,1]]*par[parind[,2]]*(exp(-par[3]*(T-t)) + exp(-par[3]*t)))
+  return( z %*% M %*% z )
+}
+
+
+matrixfit <- function(cf, t1, t2, symmetrise=TRUE, boot.R=400, boot.l=20, noObs=1,
+                      matrix.size=2, useCov=FALSE) {
+  ##if(symmetrise) {
+  ##  
+  ##}
+  matrix.size <- 2
+  Cor <- apply(cf$cf, 2, mean)
+  Err <- apply(cf$cf, 2, bootstrap.meanerror, R=boot.R, l=boot.l)
+
+  t1p1 <- t1+1
+  t2p1 <- t2+1
+  N <- length(cf$cf[,1])
+  Thalfp1 <- cf$T/2+1
+  t <- c(0:(cf$T/2))
+  CF=data.frame(t=t, Cor=Cor, Err=Err)
+  rm(Cor, Err)
+  ii <- c((t1p1):(t2p1), (t1p1+Thalfp1):(t2p1+Thalfp1), (t1p1+2*Thalfp1):(t2p1+2*Thalfp1), (t1p1+3*Thalfp1):(t2p1+3*Thalfp1))
+  parind <- array(1, dim=c(length(CF$Cor),2))
+  parind[(Thalfp1+1):(2*Thalfp1),2] <- 2
+  parind[(2*Thalfp1+1):(4*Thalfp1),1] <- 2
+  parind[(3*Thalfp1+1):(4*Thalfp1),2] <- 2
+  mSize <- length(CF$Cor)/length(t)
+  if(mSize != matrix.size^2) {
+    stop("matrix is not a square matrix, but we expect a square matrix!")
+  }
+
+  M <- diag(1/CF$Err[ii]^2)
+  CovMatrix <- numeric()
+  if(useCov) {
+    ## compute correlation matrix and compute the correctly normalised inverse
+    ## see C. Michael hep-lat/9412087
+    ## block data first
+    ncf <- block.ts(cf$cf, l=boot.l)
+    ## compute covariance matrix and invert
+    CovMatrix <- cov(ncf[,ii])
+    cov.svd <- svd(CovMatrix)
+    if(floor(sqrt(length(cov.svd$d))) < length(cf$cf[,1])) {
+      cov.svd$d[floor(sqrt(length(cov.svd$d))):length(cov.svd$d)] <- mean(cov.svd$d[floor(sqrt(length(cov.svd$d))):length(cov.svd$d)])
+    }
+    D <- diag(1/cov.svd$d)
+    M <- floor(N/boot.l)*cov.svd$v %*% D %*% t(cov.svd$u)
+  }
+
+  par <- numeric(3)
+  ## we get initial guesses for fit parameters from effective masses
+  par[3] <- -log(CF$Cor[t1p1+1]/CF$Cor[t1p1])
+  par[1] <- sqrt(CF$Cor[t1p1]/0.5/exp(-par[3]*t1))
+  par[2] <- sqrt(CF$Cor[(t1p1+3*Thalfp1)]/0.5/exp(-par[3]*t1))
+
+  opt.res <- optim(par, matrixChisqr, method="BFGS", control=list(maxit=500, parscale=par, REPORT=50),
+                   t=CF$t[ii], y=CF$Cor[ii], M=M, T=cf$T, parind=parind[ii,])
+  opt.res <- optim(opt.res$par, matrixChisqr, method="BFGS", control=list(maxit=500, parscale=opt.res$par, REPORT=50),
+                   t=CF$t[ii], y=CF$Cor[ii], M=M, T=cf$T, parind=parind[ii,])
+
+  dof <- (length(CF$t[ii])-length(par))
+  Qval <- 1-pchisq(opt.res$value, dof)
+
+  opt.tsboot <- tsboot(tseries=cf$cf[,ii], statistic=fit.formatrixboot, R=boot.R, l=boot.l, sim="geom",
+                       par=opt.res$par, t=CF$t[ii], M=M, T=cf$T, parind=parind[ii,])
+
+  res <- list(CF=CF, M=M, parind=parind, ii=ii, opt.res=opt.res, opt.tsboot=opt.tsboot,
+              boot.R=boot.R, boot.l=boot.l, useCov=useCov, CovMatrix=CovMatrix,
+              Qval=Qval, chisqr=opt.res$value, dof=dof, mSize=mSize, cf=cf, t1=t1, t2=t2)
+  attr(res, "class") <- c("matrixfit", "list")
+  return(invisible(res))
+}
+
+plot.matrixfit <- function(mfit, ...) {
+  par <- mfit$opt.res$par
+  parind <-  mfit$parind
+  T <- mfit$cf$T
+  Thalfp1 <- T/2+1
+  plotwitherror(mfit$CF$t, mfit$CF$Cor, mfit$CF$Err, log="y", ...)
+  tx <- seq(mfit$t1, mfit$t2, 0.005)
+  for(i in c(1:mfit$mSize)) {
+    y <- 0.5*par[parind[(i-1)*Thalfp1+1,1]]*par[parind[(i-1)*Thalfp1+1,2]]*(exp(-par[3]*(T-tx)) + exp(-par[3]*tx))
+    col=c("red", "brown", "green", "blue")
+    lines(tx, y, col=col[i], lwd=c(3))
+  }
+}
+
+summary.matrixfit <- function(mfit) {
+  cat("\n ** Result **\n\n")
+  cat("based on", length(mfit$cf$cf[,1]), "measurements\n")
+  cat("m \t=\t", mfit$opt.res$par[3], "\n")
+  cat("dm\t=\t", sd(mfit$opt.tsboot$t[,3]), "\n")
+  cat("P_1 \t=\t", mfit$opt.res$par[1], "\n")
+  cat("dP_1\t=\t", sd(mfit$opt.tsboot$t[,1]), "\n\n")
+  cat("P_2 \t=\t", mfit$opt.res$par[2], "\n")
+  cat("dP_2\t=\t", sd(mfit$opt.tsboot$t[,2]), "\n\n")
+
+  cat("boot.R\t=\t", mfit$boot.R, "\n")
+  cat("boot.l\t=\t", mfit$boot.l, "\n")
+  cat("useCov\t=\t", mfit$useCov, "\n")
+  cat("chisqr\t=\t", mfit$opt.res$value, "\ndof\t=\t", mfit$dof, "\nchisqr/dof=\t",
+      mfit$opt.res$value/mfit$dof, "\n")
+  ## probability to find a larger chi^2 value
+  ## if the data is generated again with the same statistics
+  ## given the model is correct
+  cat("Quality of the fit (p-value):", mfit$Qval, "\n")
+
+}
+
+fit.formatrixboot <- function(cf, par, t, M, T, parind) {
+  opt.res <- optim(par, matrixChisqr, method="BFGS", control=list(maxit=500, parscale=par, REPORT=50),
+                   t=t, y=apply(cf,2,mean), M=M, T=T, parind=parind)
+  opt.res <- optim(opt.res$par, matrixChisqr, method="BFGS", control=list(maxit=500, parscale=opt.res$par, REPORT=50),
+                   t=t, y=apply(cf,2,mean), M=M, T=T, parind=parind)
+  return(c(opt.res$par, opt.res$value))
+}
diff --git a/R/readutils.R b/R/readutils.R
@@ -1,6 +1,6 @@
 readcmicor <- function(filename) {
   data <- read.table(filename, header=F,
-                     colClasse=c("integer","integer","integer","numeric","numeric","integer"))
+                     colClasses=c("integer","integer","integer","numeric","numeric","integer"))
   attr(data, "class") <- c("cmicor", "data.frame")  
   return(invisible(data))
 }
@@ -24,14 +24,16 @@ readcmidatafiles <- function(path="./", basename="onlinemeas", skip=1,
     if(verbose) {
       cat("Reading from file", ofiles[i], "\n")
     }
-    cmicor <- rbind(cmicor, read.table(paste(path, ofiles[i], sep=""), skip=skip))
+    cmicor <- rbind(cmicor, read.table(paste(path, ofiles[i], sep=""), skip=skip, header=F,
+                                       colClasses=c("integer", "integer","integer","numeric","numeric")))
     j <- j+1
   }
   attr(cmicor, "class") <- c("cmicor", "data.frame")  
   return(invisible(cmicor))
 }
 
-extract.obs <- function(cmicor, vec.obs=c(1), ind.vec=c(1,2,3,4,5), sym.vec, verbose=FALSE) {
+extract.obs <- function(cmicor, vec.obs=c(1), ind.vec=c(1,2,3,4,5),
+                        sym.vec, sign.vec, verbose=FALSE) {
   if(missing(cmicor)) {
     stop("data missing in extract.obs\n")
   }
@@ -54,25 +56,36 @@ extract.obs <- function(cmicor, vec.obs=c(1), ind.vec=c(1,2,3,4,5), sym.vec, ver
       data[(data[,ind.vec[3]]!=0 & (data[,ind.vec[3]]!=(Thalf-1))),ind.vec[c(4,5)]]/2
   ## symmetrise or anti-symmetrise for given observable?
   isym.vec <- rep(+1, times= nrObs*Thalf*nrStypes)
+  isign.vec <- rep(+1, times= nrObs*Thalf*nrStypes)
   if(!missing(sym.vec)) {
     if(length(sym.vec) != nrObs) {
       stop("sym.vec was given, but had not the correct length")
     }
     for(i in c(1:nrObs)) {
-      if(!sym.vec) {
+      if(!sym.vec[i]) {
         isym.vec[((i-1)*Thalf*nrStypes+1):((i)*Thalf*nrStypes)] <- -1
       }
     }
   }
-  cf <- array((data[,ind.vec[4]] + isym.vec*data[,ind.vec[5]]),
-              dim=c(nrObs*Thalf*nrStypes, length(data[,1])/(nrObs*Thalf*nrStypes)))
+  if(!missing(sign.vec)) {
+    if(length(sign.vec) != nrObs) {
+      stop("sign.vec was given, but does not have the correct length")
+    }
+    for(i in c(1:nrObs)) {
+      if(sign.vec[i] < 0) {
+        isign.vec[((i-1)*Thalf*nrStypes+1):((i)*Thalf*nrStypes)] <- -1
+      }
+    }
+  }
+  cf <- t(array(isign.vec*(data[,ind.vec[4]] + isym.vec*data[,ind.vec[5]]),
+              dim=c(nrObs*Thalf*nrStypes, length(data[,1])/(nrObs*Thalf*nrStypes))))
 
   return(invisible(list(cf=cf, Time=Time, nrStypes=nrStypes, nrObs=nrObs)))
 }
 
 readhlcor <- function(filename) {
   return(invisible(read.table(filename, header=F,
-                              colClasse=c("integer", "integer","integer","integer","numeric","numeric","numeric","numeric","integer"))))
+                              colClasses=c("integer", "integer","integer","integer","numeric","numeric","numeric","numeric","integer"))))
 }
 
 readoutputdata <- function(filename) {

diff --git a/man/matrixfit.Rd b/man/matrixfit.Rd
@@ -0,0 +1,64 @@
+\name{matrixfit}
+\title{Routine For A Factorising Matrix Fit}
+\description{
+  Performs a factorising fit on a correlation matrix
+}
+\usage{
+matrixfit(cf, boot.l=2, t1=13, t2=22, useCov=TRUE)
+}
+\arguments{
+  \item{cf}{
+    correlation matrix obtained with a call to \code{extrac.obs}.
+  }
+  \item{t1}{
+    lower bound for the fitrange in time (t1,t2). Counting starts with 0.
+  }
+  \item{t2}{
+    upper bound for the fitrange in time (t1,t2). Counting starts with 0.
+  }
+  \item{symmetrise}{
+    Symmetrise the matrix, not implemented yet.
+  }
+  \item{boot.R}{
+    number of bootstrap samples
+  }
+  \item{boot.l}{
+    block size for autocorrelation analysis
+  }
+  \item{noObs}{
+    number of gamma matrix combinations in the matrix. Currently only 1
+    is implemented
+  }
+  \item{matrix.size}{
+    matrix size, currently only 2 implemented
+  }
+  \item{useCov}{
+    use correlated chisquare
+  }
+}
+\value{
+
+}
+\details{
+  The inverse covariance matrix is computed using a singular value
+  decomposition. If the sample size N is too small, only sqrt(N)
+  eigenvalues of the matrix are kept exactly, while all others are
+  replaced by the mean of the rest. This helps to reduce instabilities
+  induced by too small eigenvalues of the covariance matrix.
+}
+\references{
+  C. Michael, \href{hep-lat/9412087}{hep-lat/9412087}
+}
+\seealso{
+
+}
+\examples{
+\dontrun{readcmidatafiles(basename="outpr", verbose=TRUE, skip=1,
+last.digits=4)}
+\dontrun{cf <- extract.obs(cmicor, verbose=T, vec.obs=c(1))}
+\dontrun{x <- matrixfit(cf, boot.l=2, t1=13, t2=22, useCov=TRUE)}
+}
+\author{Carsten Urbach, \email{curbach@gmx.de}}
+\keyword{optimize}
+\keyword{ts}
+