-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathxap_mcmc_funs.py
416 lines (321 loc) · 12.9 KB
/
xap_mcmc_funs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
# Functions to support xap_mcmc.py
#
# Apertures:
# Sherpa User Model to compute source and background intensities from
# counts in source and background apertures.
#
# run_mcmc:
#
# Wrapper function for Sherpa fit and MCMC routines.
#
# write_draws:
#
# Function to output draws fits file.
#
# write_draws_mpdfs:
#
# Estimate mpdfs from draws, and use crates to save mpdfs in a fits file
###################################################################################################
class Apertures:
"""
Class of user-defined models to compute model counts in source or
background apertures. Model parameters are source and background
intensities. Data are raw counts in apertures. The vector of model
counts is computed from the vector of intensities by application of
the ECF/Exposure matrix F (see eq. 7 and Table 1 of Primini & Kashyap,
2014, ApJ, 796, 24.)
Methods:
__init__(self,F)
define F matrix
__call__(self, params,iap)
compute model counts for vector aperture for F and model intensities given
in params array.
Attributes:
Print F.
"""
def __init__(self,F):
self.F=F
def __call__(self,params,iap):
import numpy as np
F=self.F
nelem=len(params)
mvec=np.zeros(nelem)
for i in range(nelem):
for j in range(nelem):
mvec[i]=mvec[i]+F[i][j]*params[j]
return mvec
def attributes(self):
print 'ECF/Exposure Matrix:'
print self.F
print 'Length of F:'
print len(self.F)
###################################################################################################
def run_mcmc(F,C,s,sigma_s,ndraws,nburn,scale):
# Wrapper for Sherpa commands to get draws
#
# Inputs:
# F - ECF/Exposure Matrix
# C - Raw aperture counts vector
# Related to vector of model intensities for source
# and background M by matrix equation
# C = F x M
# s - MLE intensities vector
# sigma_s Errors on s
# ndraws - Number of draws
# nburn - Number of draws to skip at start of sampling
# scale - Initial scale for covariance matrix in get_draws
#
# Outputs:
# parnames,stats,accept,params - parameter names and vectors from
# get_draws with first nburn samples eliminated
import sherpa.astro.ui as shp
import numpy as np
# Create an instance of the Apertures class and load it as the user model
apertures = Apertures(F)
shp.load_user_model(apertures,"xap")
# Make a dummy independent variable array to go with C, and load it and C
# as the dataset
ix=np.arange(len(C))
shp.load_arrays(1,ix,C,shp.Data1D)
# Define parameter names. The last element in C is always the background.
parnames=[]
for i in range(len(C)-1):
parnames.append('Source_%d' % i)
parnames.append('Background')
# and add user pars, using s, sigma_s to establish guesses and bounds for parameters.
# Offset values of s by 5% to force re-fit for maximum likelihood.
shp.add_user_pars("xap",parnames,1.05*s,parmins=1.05*s-5.0*sigma_s,parmaxs=1.05*s+5.0*sigma_s)
# Set model, statistic, and minimization method
shp.set_model(xap)
shp.set_stat("cash")
shp.set_method("moncar")
# Finally, run fit and covar to set bounds for get_draws
# First, set hard lower limit for source intensity to avoid negative values
for i in range(len(xap.pars)):
xap.pars[i]._hard_min=0.0
import logging
logger = logging.getLogger("sherpa")
# logger.setLevel(logging.ERROR)
shp.fit()
shp.covar()
# Check that covar returns valid matrix:
cvmat = shp.get_covar_results().extra_output
if np.any(np.isnan(cvmat.reshape(-1))) or np.any(np.diag(cvmat)<0.0):
print "WARNING: covar() returned an invalid covariance matrix. Attempting to use conf()."
# Try to use conf() to set diagonal elements of covar matrix. If that doesn't work, use sigma_s
shp.conf()
conf_err=np.array(shp.get_conf_results().parmaxes)
if not np.all(conf_err>0.0):
print "WARNING: conf() returned invalid errors. Using MLE errors."
conf_err=sigma_s
cvmat = np.diag(conf_err*conf_err)
shp.get_covar_results().extra_output=cvmat
# initscale = 0.1
# import pdb;pdb.set_trace()
shp.set_sampler_opt('scale',scale)
stats,accept,intensities=shp.get_draws(niter=ndraws)
# and return all but the first nburn values. If nburn>=ndraws,
# return a single draw.
nburn1=min(nburn,ndraws-1)
return parnames,stats[nburn1:],accept[nburn1:],intensities[:,nburn1:]
###################################################################################################
def write_draws(filename,parnames,stats,accept,intensities):
# Use crates to save draws in a fits file
# Inputs:
# filename - output draws file name
# parnames - names of source/backgrounds
# stats - vector of stats from get_draws
# accept - vector of draws acceptances
# intensities - 2-d table of draws for each aperture
import pycrates as pc
import crates_contrib.utils as pcu
tab=pc.TABLECrate()
tab.name="Draws"
pcu.add_colvals(tab,'Stat',stats)
pcu.add_colvals(tab,'Accept',accept)
for i in range(len(parnames)):
pcu.add_colvals(tab,parnames[i],intensities[i])
ds=pc.CrateDataset()
ds.add_crate(tab)
ds.write(filename,clobber=True)
return
###################################################################################################
def write_draws_mpdfs(filename,parnames,intensities,ndrmesh):
# Estimate mpdfs from draws, and use crates to save mpdfs in a fits file
# Inputs:
# filename - output draws mpdfs file name
# parnames - names of source/backgrounds
# intensities - 2-d table of draws for each aperture
# ndrmesh - number of grid points in mpdfs
import numpy as np
import pycrates as pc
import crates_contrib.utils as pcu
import xap_funs as xfuns
import sys
sys.path.append("/Users/fap/anaconda/lib/python2.7/site-packages/")
from sklearn.neighbors import KernelDensity
cds=pc.CrateDataset()
for i in range(len(parnames)):
sdraws=intensities[i]
# Compute Gamma Dist alpha, beta from mean and variance
# of intensity values in draws
smean = np.sum(sdraws)/len(sdraws)
svar = np.sum(sdraws*sdraws)/len(sdraws) - smean*smean
alpha = smean*smean/svar
beta = smean/svar
smin = max(min(sdraws),1.0e-10) # should never happen, but just in case, make sure sdraws>0
smax = max(sdraws)
ds = (smax-smin)/ndrmesh
sgrid = np.arange(smin,smax,ds)
# First Gamma Dist
gps = xfuns.Gamma_Prior(alpha,beta)
draws_mpdf = gps(sgrid)
# Now do KDE
vals_kde=KernelDensity(kernel='epanechnikov', bandwidth=np.sqrt(svar)).fit(sdraws[:,np.newaxis])
log_dens = vals_kde.score_samples(sgrid[:,np.newaxis])
epan_pdf = np.exp(log_dens)
tab=pc.TABLECrate()
tab.name=parnames[i]+"_"+"Gamma_Dist"
pcu.add_colvals(tab,'Inten',sgrid)
pcu.add_colvals(tab,'MargPDF',draws_mpdf)
pc.set_key(tab,'alpha',alpha)
pc.set_key(tab,'beta',beta)
pc.set_key(tab,'smean',smean)
pc.set_key(tab,'svar',svar)
pc.set_key(tab,'Stepsize',ds)
cds.add_crate(tab)
tab=pc.TABLECrate()
tab.name=parnames[i]+"_"+"Epan_KDE"
pcu.add_colvals(tab,'Inten',sgrid)
pcu.add_colvals(tab,'MargPDF',epan_pdf)
pc.set_key(tab,'alpha',alpha)
pc.set_key(tab,'beta',beta)
pc.set_key(tab,'smean',smean)
pc.set_key(tab,'svar',svar)
pc.set_key(tab,'Stepsize',ds)
cds.add_crate(tab)
cds.write(filename,clobber=True)
return
###################################################################################################
def modalpoint(f,CL):
"""
Estimate mode and CL level bounds of probability density function given a set of MCMC samples
Usage:
mode = modalpoint(f)
Inputs:
f array of MCMC samples
CL desired confidence level
Outputs:
mode of distribution
lower CL bound
upper CL bound
UL flag - True if array limit reached when searching for lower bound
LL flag - True if array limit reached when searching for upper bound
CL_real - Actual CL achieved
Description:
Samples are sorted and divided into two subsets, each covering half of the range.
The subset with the larger number of points is kept, and the process is repeated
until the larger subset has only two points, or until to number of points does not change.
The average of the remaining samples is returned as the mode. The test for number of points
not changing is used to trap the case where a number of rejected draws appear after an
accepted draw, and the parameter value is the same for all of them. In this case, the draws
can be winowed down to a single parameter value appearing many times, and the mode will not
converge.
Once the mode is estimated, the routine starts at the index in the sorted array of samples closest to
but less than the mode, and adds values alternately above and below the starting point until CL% of the
total number of samples is reached. If the search encounters 0, an upper limit flag is set and the
search continues above the mode only. If search encounters upper array bound, a lower limit flag is set,
the search continues below the mode only.
"""
import numpy as np
import copy
fcopy = copy.copy(f)
fcopy.sort()
npts = len(fcopy)
if npts < 1:
raise ValueError("Invalid Number of Samples")
fmin=fcopy[0]
fmax=fcopy[-1]
npts_old=-1
testit=True
while testit:
midrange = 0.5*(fmin+fmax)
lorange = fcopy[fcopy<midrange]
hirange = fcopy[fcopy>=midrange]
# print fmin,fmax,len(lorange),len(hirange)
# raw_input("PRESS ENTER TO CONTINUE")
if(len(lorange)>=len(hirange)):
fcopy=lorange
fmax=midrange
if(len(lorange)<len(hirange)):
fcopy=hirange
fmin=midrange
npts_old=copy.copy(npts)
npts=len(fcopy)
testit=npts>2 and npts!=npts_old
mode=sum(fcopy)/len(fcopy)
# Now try to find bounds, starting at mode
# First, get unique draws values
fun=np.unique(f)
if(mode>fun[-1]):
msg="Mode %f greater than highest draws value %f" % (mode,fun[-1])
raise ValueError(msg)
if(mode<fun[0]):
msg="Mode %f less than lowest draws value %f" % (mode,fun[0])
raise ValueError(msg)
# Find index closest to but less than mode
istart=max(np.where(fun<=mode)[0])
# print "Mode:\t%f" % mode
# print "Start looking at value %f" % fun[istart]
# raw_input("PRESS ENTER TO CONTINUE.")
ULFlag=False
LLFlag=False
ilo=istart
ihi=istart
ncl=int(CL*len(f))
ncount=0
while ncount<ncl:
ihi=ihi+1
if ihi>(len(fun)-1):
ihi=len(fun)-1
LLFlag=True
ncount=len(np.where((f>=fun[ilo]) & (f<=fun[ihi]))[0])
# print "%d values between %f and %f" % (ncount,fun[ilo],fun[ihi])
if(ncount>=ncl):
break
ilo=ilo-1
if ilo<0:
ilo=0
ULFlag=True
ncount=len(np.where((f>=fun[ilo]) & (f<=fun[ihi]))[0])
# print "%d values between %f and %f" % (ncount,fun[ilo],fun[ihi])
hibound=fun[ihi]
lobound=fun[ilo]
if ULFlag:
lobound=0.0
clreal=float(ncount)/len(f)
return (mode,lobound,hibound,ULFlag,LLFlag,clreal)
###################################################################################################
def comp_pdfs(vals):
# Estimate pdf from a set of values, assumed sampled from a probability density, by
# epanechnikov kde and Fit to Gamma Distribution.
# Only works on laptop, due to sklearn installation
import numpy as np
import xap_funs as xfuns
import sys
sys.path.append("/Users/fap/anaconda/lib/python2.7/site-packages/")
from sklearn.neighbors import KernelDensity
nvals=len(vals)
vmean=sum(vals)/nvals
vsig=np.sqrt(sum(vals*vals)/nvals-vmean*vmean)
vals_grid=np.linspace(max(vmean-5.0*vsig,0.0),vmean+5.0*vsig,100)
# First, sklearn kde with epan. kernel and bw set to sigma
vals_kde=KernelDensity(kernel='epanechnikov', bandwidth=vsig).fit(vals[:,np.newaxis])
log_dens = vals_kde.score_samples(vals_grid[:,np.newaxis])
epan_pdf = np.exp(log_dens)
# Next, Gamma Distribution
alpha = (vmean/vsig)*(vmean/vsig)
beta = vmean/(vsig*vsig)
gps = xfuns.Gamma_Prior(alpha,beta)
Gam_pdf = gps(vals_grid)
return vals_grid,epan_pdf,Gam_pdf