gpstuff-dev · avehtari · Nov 1, 2017
diff --git a/misc/psislw.m b/misc/psislw.m
@@ -1,31 +1,25 @@
-function [lw,kss] = psislw(lw,wcpp,wtrunc)
-%VGIS Pareto smoothed importance sampling
+function [lw,kss] = psislw(lw,Reff)
+%PSIS Pareto smoothed importance sampling
 %   
 %  Description
-%    [LW,K] = PSISLW(LW,WCPP,WCUTOFF) returns log weights LW
+%    [LW,K] = PSISLW(LW,Reff) returns log weights LW
 %    and Pareto tail indeces K, given log weights and optional arguments:
-%      WCPP    - percentage of samples used for generalise Pareto
-%                distribution (GPD) fit estimate (default = 20)
-%      WTRUNC  - parameter for truncating very large weights to N^WTRUNC,
-%                with no truncation if 0 (default = 3/4)
+%      Reff - relative MCMC efficiency N_eff/N
 %    
-%  Reference:
-%    Aki Vehtari and Andrew Gelman (2015). Pareto smoothed importance
-%    sampling. arXiv preprint arXiv:1507.02646.
+%  Reference
+%    Aki Vehtari, Andrew Gelman and Jonah Gabry (2017). Pareto
+%    smoothed importance sampling. https://arxiv.org/abs/1507.02646v5
 %
-% Copyright (c) 2015 Aki Vehtari
+% Copyright (c) 2015-2017 Aki Vehtari, Tuomas Sivula
 
 % This software is distributed under the GNU General Public
 % License (version 3 or later); please refer to the file
 % License.txt, included with the software, for details.
 if size(lw,1)<=1
-    error('vgislw: more than one log-weight needed');
+    error('psislw: more than one log-weight needed');
 end
 if nargin<2
-    wcpp=20;
-end
-if nargin<3
-    wtrunc=3/4;
+    Reff=1;
 end
 
 for i1=1:size(lw,2)
@@ -35,7 +29,8 @@
     x=x-max(x);
     % Divide log weights into body and right tail
     n=numel(x);
-    xcutoff=prctile(x,100-wcpp);
+    xs=sort(x);
+    xcutoff=xs(end-ceil(min(0.2*n,3*sqrt(n/Reff))));
     if xcutoff<log(realmin)
         % need to stay above realmin
         xcutoff=-700;
@@ -52,16 +47,23 @@
         [~,x2si]=sort(x2);
         % fit generalized Pareto distribution to the right tail samples
         [k,sigma]=gpdfitnew(exp(x2)-exp(xcutoff));
+    end
+    if k<1/3 || isinf(k)
+        % no smoothing if short tail or GPD fit failed
+        qx=x;
+    else
+        x1=x(x<=xcutoff);
+        x2=x(x>xcutoff);
+        n2=numel(x2);
+        [~,x2si]=sort(x2);
         % compute ordered statistic for the fit
         qq=gpinv(([1:n2]-0.5)/n2,k,sigma)+exp(xcutoff);
         % remap back to the original order
         slq=zeros(n2,1);slq(x2si)=log(qq);
         % join body and GPD smoothed tail
         qx=x;qx(x<=xcutoff)=x1;qx(x>xcutoff)=slq;
-    end
-    if wtrunc>0
-        % truncate too large weights
-        lwtrunc=wtrunc*log(n)-log(n)+sumlogs(qx);
+        % truncate smoothed values to the largest raw weight
+        lwtrunc=max(x);
         qx(qx>lwtrunc)=lwtrunc;
     end
     % renormalize weights
@@ -70,13 +72,24 @@
     lw(:,i1)=lwx;
     kss(1,i1)=k;
 end
-%ksi=find(kss>=0.5&kss<1);
-% if ~isempty(ksi)
-%     warning('Following indeces have estimated tail index 1/2>k>1');
-%     disp(ksi)
-% end
-% ksi=find(kss>=1);
-% if ~isempty(ksi)
-%     warning('Following indeces have estimated tail index k>1');
-%     disp(ksi)
-% end
+end
+
+function x = gpinv(p,k,sigma)
+% Octave compatibility by Tuomas Sivula
+    x = NaN(size(p));
+    if sigma <= 0
+        return
+    end
+    ok = (p>0) & (p<1);
+    if abs(k) < eps
+        x(ok) = -log1p(-p(ok));
+    else
+    x(ok) = expm1(-k * log1p(-p(ok))) ./ k;
+    end
+    x = sigma*x;
+    if ~all(ok)
+        x(p==0) = 0;
+        x(p==1 & k>=0) = Inf;
+        x(p==1 & k<0) = -sigma/k;
+    end
+end