-
Notifications
You must be signed in to change notification settings - Fork 0
/
UK_predict.m
234 lines (210 loc) · 6.84 KB
/
UK_predict.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
function output=UK_predict(uk,pstd,bayes)
% function output=UK_predict(uk,pstd,bayes)
%
% Models SSTs from UK'37 using the BAYSPLINE calibration.
% Please cite the source publication when using this calibration:
%
% Tierney, JE and Tingley, MP (2018). BAYSPLINE: A new calibration for the
% alkenone paleothermometer. Paleoceanography and Paleoclimatology, 33.
% https://doi.org/10.1002/2017PA003201
%
% ----- Inputs -----
% uk = uk37' values of length N
%
% pstd = prior standard deviation (scalar). Recommended value: 10. This
% conservative value that works for most applications.
% At high UK, a more restrictive value of 5 may be desirable to
% assign a low probability to SSTs > 35C (Unlikely given that UK approaches
% 1 near 30C).
%
% bayes: (optional - mainly for use with the DASH interface) A Bayesian
% posterior to use for the calibration. if empty, use the default posterior
% file.
%
% ----- Output -----
% output: a structure containing the following:
% output.prior_mean = Prior mean value, taken from the UK timeseries
% converted to SST with the Prahl '88 culture equation.
%
% output.prior_std = Prior standard deviation (user set).
%
% output.jump_dist = standard deviation of the jumping distribution.
% Values are chosen to achieve a acceptance rate of ca. 0.44
% (Gelman, 2003).
%
% output.rhat = Rhat statistic to assess convergence. Should be close to 1.
%
% output.SST = 3 x N vector of inferred SSTs, includes 2.5% level, 50%
% level (median values), and 97.5% level.
%
% output.ens = full ensemble (N = 1000) of posterior SSTs.
%
% ----- Plots -----
% This program generates a couple of sanity check plots:
% 1) Prior distribution vs posterior distribution. Prior should be wider
% unless a strong control is warranted.
%
% 2) Time series converted to SST values, with 1-sigma confidence intervals.
%
% For a full explanation of the approach see Tierney & Tingley (2018).
%% load model parameters
% process optional call to a different posterior file
% load posteriors for B coefficients and tau^2 variance
ng=nargin;
if ng == 3
bayesParams=load(bayes);
else
bayesParams=load('bayes_posterior_v2.mat');
end
%thin the posterior draws a bit
bdraws=bayesParams.bdraws(1:2:end,:);
tau2=bayesParams.tau2(1:2:end);
%confirm UK obs are column vector
uk=uk(:);
N_Ts=length(uk);
N_p=length(tau2);
%number of samples
N=500;
%burnin to discard
burnin=250;
%set priors. Use Prahl 88 to create a prior mean vector
prior_mean=(uk-.039)./.034;
%prior variance comes from user-defined pstd
prior_var=pstd^2*ones(N_Ts,1);
%save priors to output
output.prior_mean=prior_mean;
output.prior_std=pstd;
%set the initial SST values to prior mean
init=prior_mean;
%create empty matrices of the correct size
MH_samps_t=NaN(N_Ts,N_p,N-burnin);
accepts_t=NaN(N_Ts,N_p,N-burnin);
%make a spline with set knots
order=3; %spline order
kn = augknt(bayesParams.knots,order); %knots
%assign width of jumping distribution based on average UK temp to obtain
%40-50% accept rate
pmm=median(prior_mean);
if pmm<20
JW=3.5;
elseif pmm>=20 && pmm <= 23.7
JW=3.7;
else
JW=pmm*0.8092-15.1405;
end
output.jump_dist=JW;
%% MH loop
tic
parfor jj=1:N_p
accepts = NaN(N_Ts,N);
samps = NaN(N_Ts,N);
b_now=bdraws(jj,:);
tau_now=tau2(jj);
%use spmak to put together the b-spline
bs_b=spmak(kn,b_now);
%extrapolate function
bs=fnxtr(bs_b);
%intialize at starting value
samps(:,1)=init;
s_now=samps(:,1);
%evaluate mean UK value at current SST
mean_now=fnval(bs,s_now);
%evaluate likelihood
LL_now=normpdf(uk,mean_now,sqrt(tau_now));
%evaluate prior
pr_now=normpdf(s_now,prior_mean,sqrt(prior_var));
%multiply to get initial proposal S0
S0_now=LL_now.*pr_now;
for kk=2:1:N
%generate proposal using normal jumping distr.
s_prop=normrnd(s_now,JW);
%evaluate mean value at current SST
mean_now=fnval(bs,s_prop);
%evaluate likelihood
LL_now=normpdf(uk,mean_now,sqrt(tau_now));
%evaluate prior
pr_now=normpdf(s_prop,prior_mean,sqrt(prior_var));
%multiply to get proposal S0_p
S0_p=LL_now.*pr_now;
%calculate MH ratio
MH_rat=S0_p./S0_now;
success_rate=min(1, MH_rat);
%make the draw:
draw=rand(N_Ts,1);
B = draw <= success_rate;
s_now(B)=s_prop(B);
S0_now(B)=S0_p(B);
accepts(B,kk)=1;
samps(:,kk)=s_now;
end
MH_samps_t(:,jj,:)=samps(:,burnin+1:end);
accepts_t(:,jj,:)=accepts(:,burnin+1:end);
end
toc
%now let's calculate the Rhat statistic to assess convergence.
rhats=NaN(size(MH_samps_t,1),1);
for i=1:size(MH_samps_t,1)
[rhats(i),~]=ChainConvergence(squeeze(MH_samps_t(i,:,:)), N_p);
end
output.rhat=median(rhats);
%reshape
MH_c=reshape(MH_samps_t,N_Ts,N_p*(N-burnin));
%calculate acceptance
output.accepts = nansum(accepts_t(:))./(N_Ts*N_p*(N-burnin));
%save subsample of ensemble
output.ens=MH_c(:,1:125:end);
%sort and assign to output
MH_s=sort(MH_c,2);
pers3=round([.025 .50 .975].*size(MH_c,2));
output.SST=MH_s(:,pers3);
%%
%plot prior vs post
f1=figure(1); clf;
set(f1,'pos',[50 700 400 400]);
xt=[0:.1:40]';
prior=normpdf(xt,prior_mean(1),sqrt(prior_var(1)));
post=ksdensity(output.ens(:),xt);
pr=plot(xt,prior,'k--'); hold on;
pt=plot(xt,post,'b-');
legend([pr pt],'Prior','Posterior');
legend('boxoff');
%%
%plot timeseries with 95% CI
f2=figure(2); clf;
set(f2,'pos',[550 700 500 400]);
plot(output.SST(:,2),'color','k','linewidth',2);
hold on;
plot(output.SST(:,3),'color',[.6 .6 .6],'linewidth',1);
hold on;
plot(output.SST(:,1),'color',[.6 .6 .6],'linewidth',1);
%% subfunction: ChainConvergence utility
function [Rhat, Neff]=ChainConvergence(chains, M)
%
% function [Rhat, Neff]=ChainConvergence(chains, M)
%
% calculate the R-hat statistic for
% monitoring convergence of multiple parallel MCMC runs. Also outputs
% n_eff.
% chains: a matrix of MCMC chains, each of the same length.
% M: the number of different chains - must be one of the dimensions of
% chains.
%
if ismember(M, size(chains))==0
disp('Error: Second input must be the number of chains, so one of the dimensions of the first input.')
return
elseif length(chains(:,1))==M
chains=chains';
end
% each column is a chain.
Nc=length(chains(:,1));
%quantities from Gelman:
psi_bar_dot_j = mean(chains);
psi_bar_dot_dot=mean(psi_bar_dot_j);
Bp = (Nc/(M-1))*sum((psi_bar_dot_j-psi_bar_dot_dot).^2);
s2_j= (1/(Nc-1))*sum((chains-kron(psi_bar_dot_j, ones(Nc,1))).^2);
W=(1/M)*sum(s2_j);
var_hat_pos=((Nc-1)/Nc)*W + (1/Nc)*Bp;
Rhat=sqrt(var_hat_pos/W);
Neff=M*Nc*min(var_hat_pos/Bp, 1);
end
end