hessian.m

function [hess,err] = hessian(fun,x0)
% hessian: estimate elements of the Hessian matrix (array of 2nd partials)
% usage: [hess,err] = hessian(fun,x0)
%
% Hessian is NOT a tool for frequent use on an expensive
% to evaluate objective function, especially in a large
% number of dimensions. Its computation will use roughly
% O(6*n^2) function evaluations for n parameters.
% 
% arguments: (input)
%  fun - SCALAR analytical function to differentiate.
%        fun must be a function of the vector or array x0.
%        fun does not need to be vectorized.
% 
%  x0  - vector location at which to compute the Hessian.
%
% arguments: (output)
%  hess - nxn symmetric array of second partial derivatives
%        of fun, evaluated at x0.
%
%  err - nxn array of error estimates corresponding to
%        each second partial derivative in hess.
%
%
% Example usage:
%  Rosenbrock function, minimized at [1,1]
%  rosen = @(x) (1-x(1)).^2 + 105*(x(2)-x(1).^2).^2;
%  
%  [h,err] = hessian(rosen,[1 1])
%  h =
%           842         -420
%          -420          210
%  err =
%    1.0662e-12   4.0061e-10
%    4.0061e-10   2.6654e-13
%
%
% Example usage:
%  cos(x-y), at (0,0)
%  Note: this hessian matrix will be positive semi-definite
%
%  hessian(@(xy) cos(xy(1)-xy(2)),[0 0])
%  ans =
%           -1            1
%            1           -1
%
%
% See also: derivest, gradient, gradest, hessdiag
%
%
% Author: John D'Errico
% e-mail: woodchips@rochester.rr.com
% Release: 1.0
% Release date: 2/10/2007

% parameters that we might allow to change
params.StepRatio = 2.0000001;
params.RombergTerms = 3;

% get the size of x0 so we can reshape
% later.
sx = size(x0);

% was a string supplied?
if ischar(fun)
  fun = str2func(fun);
end

% total number of derivatives we will need to take
nx = length(x0);

% get the diagonal elements of the hessian (2nd partial
% derivatives wrt each variable.)
[hess,err] = hessdiag(fun,x0);

% form the eventual hessian matrix, stuffing only
% the diagonals for now.
hess = diag(hess);
err = diag(err);
if nx<2
  % the hessian matrix is 1x1. all done
  return
end

% get the gradient vector. This is done only to decide
% on intelligent step sizes for the mixed partials
[grad,graderr,stepsize] = gradest(fun,x0);

% Get params.RombergTerms+1 estimates of the upper
% triangle of the hessian matrix
dfac = params.StepRatio.^(-(0:params.RombergTerms)');
for i = 2:nx
  for j = 1:(i-1)
    dij = zeros(params.RombergTerms+1,1);
    for k = 1:(params.RombergTerms+1)
      dij(k) = fun(x0 + swap2(zeros(sx),i, ...
        dfac(k)*stepsize(i),j,dfac(k)*stepsize(j))) + ...
        fun(x0 + swap2(zeros(sx),i, ...
        -dfac(k)*stepsize(i),j,-dfac(k)*stepsize(j))) - ...
        fun(x0 + swap2(zeros(sx),i, ...
        dfac(k)*stepsize(i),j,-dfac(k)*stepsize(j))) - ...
        fun(x0 + swap2(zeros(sx),i, ...
        -dfac(k)*stepsize(i),j,dfac(k)*stepsize(j)));
        
    end
    dij = dij/4/prod(stepsize([i,j]));
    dij = dij./(dfac.^2);
    
    % Romberg extrapolation step
    [hess(i,j),err(i,j)] =  rombextrap(params.StepRatio,dij,[2 4]);
    hess(j,i) = hess(i,j);
    err(j,i) = err(i,j);
  end
end


end % mainline function end

% =======================================
%      sub-functions
% =======================================
function vec = swap2(vec,ind1,val1,ind2,val2)
% swaps val as element ind, into the vector vec
vec(ind1) = val1;
vec(ind2) = val2;

end % sub-function end


% ============================================
% subfunction - romberg extrapolation
% ============================================
function [der_romb,errest] = rombextrap(StepRatio,der_init,rombexpon)
% do romberg extrapolation for each estimate
%
%  StepRatio - Ratio decrease in step
%  der_init - initial derivative estimates
%  rombexpon - higher order terms to cancel using the romberg step
%
%  der_romb - derivative estimates returned
%  errest - error estimates
%  amp - noise amplification factor due to the romberg step

srinv = 1/StepRatio;

% do nothing if no romberg terms
nexpon = length(rombexpon);
rmat = ones(nexpon+2,nexpon+1);
switch nexpon
  case 0
    % rmat is simple: ones(2,1)
  case 1
    % only one romberg term
    rmat(2,2) = srinv^rombexpon;
    rmat(3,2) = srinv^(2*rombexpon);
  case 2
    % two romberg terms
    rmat(2,2:3) = srinv.^rombexpon;
    rmat(3,2:3) = srinv.^(2*rombexpon);
    rmat(4,2:3) = srinv.^(3*rombexpon);
  case 3
    % three romberg terms
    rmat(2,2:4) = srinv.^rombexpon;
    rmat(3,2:4) = srinv.^(2*rombexpon);
    rmat(4,2:4) = srinv.^(3*rombexpon);
    rmat(5,2:4) = srinv.^(4*rombexpon);
end

% qr factorization used for the extrapolation as well
% as the uncertainty estimates
[qromb,rromb] = qr(rmat,0);

% the noise amplification is further amplified by the Romberg step.
% amp = cond(rromb);

% this does the extrapolation to a zero step size.
ne = length(der_init);
rombcoefs = rromb\(qromb'*der_init);
der_romb = rombcoefs(1,:)';

% uncertainty estimate of derivative prediction
s = sqrt(sum((der_init - rmat*rombcoefs).^2,1));
rinv = rromb\eye(nexpon+1);
cov1 = sum(rinv.^2,2); % 1 spare dof
errest = s'*12.7062047361747*sqrt(cov1(1));

end % rombextrap