k_oisvm_train.m

function model = k_oisvm_train(X,Y,model)
% K_OISVM_TRAIN Kernel Online Independent SVM algorithm
%
%    MODEL = K_OISVM_TRAIN(X,Y,MODEL) trains an classifier according to the
%    Online Independent SVM algorithm, using kernels.
%
%    Additional parameters:
%    - model.C is the weight of the error, used to reduce the amount of
%      regularization.
%      Default value is 1.
%    - model.eta is the sparseness parameter, used to trade-off the
%      performance for sparseness of the classifier.
%      Default value is 0.1.
%
%   References:
%     - Orabona, F., Castellini, C., Caputo, B., Jie, L., & Sandini, G. (2010).
%       Online Independent Support Vector Machines.
%       Pattern Recognition 43(4), (pp. 1402-1412).

%    This file is part of the DOGMA library for MATLAB.
%    Copyright (C) 2009-2011, Francesco Orabona
%
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.
%
%    Contact the author: francesco [at] orabona.com

n = length(Y);   % number of training samples

if isfield(model,'iter')==0
    model.iter=0;
    model.beta=[];
    model.errTot=0;
    model.numSV=zeros(numel(Y),1);
    model.aer=zeros(numel(Y),1);
    model.pred=zeros(numel(Y),1);

    model.num_ker_eval=0;
    model.X=[];
    model.Y=[];
    model.K=zeros(100,100);
    model.x=0;
    model.sv=[];
    model.ss=0;
    model.KbInv=[];
end

if isfield(model,'C')==0
    model.C=1;
end

if isfield(model,'eta')==0
    model.eta=.1;
end

C=model.C;
ker=model.ker;
kerparam=model.kerparam;

if ~isfield(model,'maxiter'),  model.maxiter  = 20;  end
if ~isfield(model,'rows_mem'), model.rows_mem = 100; end
if ~isfield(model,'cols_mem'), model.cols_mem = 100; end
  
maxRow=size(model.K,1);
maxCol=size(model.K,2);

for curr=1:n
        
    model.iter=model.iter+1;
    dimS=length(model.S);
    
    model.X=[model.X , X(:,curr)];
    model.Y=[model.Y , Y(curr)];
    ultimo=size(model.X,2);
    
    last_col=[Y(curr); feval(ker,model.X,model.S,ultimo,kerparam) * Y(curr)];
    model.num_ker_eval=model.num_ker_eval+numel(model.S);
    model.K(1:dimS+1,ultimo)=last_col;
    if ultimo==maxCol
        tmp=zeros([size(model.K,1) size(model.K,2)+model.cols_mem]);
        tmp(1:dimS+1,1:ultimo)=model.K(1:dimS+1,1:ultimo);
        %tmp(:,1:curr)=K(:,1:curr); %is it faster copying even the zeros?
        model.K=tmp;
        maxCol=size(model.K,2);
    end
    
    if dimS>0
        colonna2=model.K(2:dimS+1,ultimo)*Y(curr);
        amin=model.KbInv*colonna2;
        delta=feval(ker,X,curr,curr,kerparam)-colonna2'*amin;
        model.num_ker_eval=model.num_ker_eval+1;
        
        if (delta>model.eta)
            model.KbInv=[model.KbInv, zeros(dimS,1);zeros(1,dimS+1)];
            model.KbInv=model.KbInv+[amin; -1]*[amin; -1]'/delta;

            model.S = [model.S ultimo];
            riga=feval(ker,model.X,ultimo,1:ultimo,kerparam).*model.Y(1:ultimo);
            model.num_ker_eval=model.num_ker_eval+ultimo;
            model.K(dimS+2,1:ultimo)=riga;
            
            % update Cholesky decomposition of the hessian
            d  = length(model.S);
            h = [0; model.K(d+1,model.S)'.*model.Y(model.S)'] + C * model.K(1:d+1,model.sv) * model.K(d+1,model.sv)';
            h2 = h(end,:);
            h2 = h2 + 1e-10*h2*eye(size(h,2)); % Ridge is only for numerical reason
            h3 = model.hess' \ h(1:d,:);
            h4 = sqrt(h2-h3'*h3);
            model.hess = [[model.hess h3]; [zeros(1,d) h4]];
            
            model.ss=[model.ss;sum(riga(model.sv))];
            if dimS+2==maxRow
                tmp=zeros([size(model.K,1)+model.rows_mem size(model.K,2)]);
                tmp(1:size(model.K,1),1:ultimo)=model.K(1:size(model.K,1),1:ultimo);
                model.K=tmp;
                maxRow=size(model.K,1);
            end
        end
    else
        model.S = ultimo;
        riga=feval(ker,model.X,ultimo,1:ultimo,kerparam).*model.Y(1:ultimo)';
        model.num_ker_eval=model.num_ker_eval+ultimo;
        model.K(2,1:ultimo)=riga;
        % update Cholesky decomposition of the hessian
        model.hess=[1e-5,0;0,sqrt(model.K(2,1)*Y(1))];
        model.ss=[model.ss;sum(riga(model.sv))];
        model.KbInv=(model.K(2,1)*Y(1))^-1;
    end
            
    outNew = model.K(1:length(model.x),ultimo)'*model.x;
    
    model.errTot=model.errTot+(sign(outNew)<=0);
    model.aer(model.iter)=model.errTot/model.iter;

    model.pred(model.iter)=outNew*Y(curr);
    
    if outNew<1
      K2 = model.K(1:size(model.hess,1),1:ultimo);
      
      model.hess = cholupdate(model.hess,sqrt(C)*K2(:,ultimo),'+');
      model.ss = model.ss+K2(:,ultimo);
      
      iter = 0;

      model.sv = [model.sv,ultimo];
      sv_bool = zeros(1,ultimo);
      sv_bool(model.sv) = 1;

      while iter < model.maxiter
        iter = iter + 1;
        % Take a few Newton step (no line search). By writing out the
        % equations, this simplifies to following equation:
        model.x = C*(model.hess \ (model.hess' \ model.ss));
        
        out = model.x'*K2;      % Recompute the outputs...

        new_sv_bool = (out<1);
        new_sv = find(new_sv_bool);

        % The set of errors has changed (and so the Hessian), so we update
        % the Cholesky decomposition of the Hessian.
        change=0;
        for i=find(new_sv_bool>sv_bool)
          model.hess = cholupdate(model.hess,sqrt(C)*K2(:,i),'+');
          model.ss = model.ss+K2(:,i);
          change=1;
        end
        for i=find(sv_bool>new_sv_bool)
          model.hess = cholupdate(model.hess,sqrt(C)*K2(:,i),'-');
          model.ss = model.ss-K2(:,i);
          change=1;
        end

        % Compute the objective function (it is not needed by the algorithm)
        % obj = 0.5* (norm(hess*x)^2 - 2*C*sum(out(new_sv)) + C*length(new_sv));
        % fprintf(['\rNb basis = %d (%d), iter Newton = %d, Obj = %.2f, ' ...
        %     'Nb errors = %d   '],length(hess)-1,length(find(x))-1,iter,obj,length(sv));
        
        if change==0
            break;
        end
        model.sv = new_sv;
        sv_bool=new_sv_bool;
      end
    end
    
    model.numSV(model.iter)=numel(model.S);
    
    if (mod(curr,model.step)==0)
        fprintf('#%.0f SV:%5.2f(%d)\tAER:%5.2f\n', ...
        ceil(curr/1000),numel(model.S)/curr*100,numel(model.S),model.aer(model.iter)*100);
    end
end

model.beta = [model.x(2:end); zeros(numel(model.S)-(numel(model.x)-1),1)]';
model.b = model.x(1);