/
utilities.py
262 lines (205 loc) · 7.78 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
"""
Provide several support functions for the deboosting such as
comparison of matrices, data conversions, percentile point calculation and
output formatting.
"""
import os
import numpy as np
from scipy import integrate
from scipy import interpolate
import matplotlib.pyplot as plt
import ConfigParser
import json
import pyfits
import ast
def iniparse(filename, flat=True):
r"""Note that this calls literal_eval. If this fails, just use the string
"""
config = ConfigParser.RawConfigParser()
config.read(filename)
params = {}
for section in config.sections():
if not flat:
params[section] = {}
for key, value in config.items(section):
try:
eval_val = ast.literal_eval(value)
except (SyntaxError, ValueError):
eval_val = value
if flat:
params[key] = eval_val
else:
params[section][key] = eval_val
return params
def load_fits_primary(filename, transpose=True):
"""load and transpose a fits file's primary unit using pyfits"""
output = pyfits.open(filename)
output = np.array(output[0].data)
if transpose:
output = output.transpose()
return output
def compare_matrices_fits(matrix1, matrix2, filename, fractional=False,
replacenan=0.):
"""write out a fits file that makes it easy to compare matrices in ds9:
ds9 -multiframe filename.fits
"""
if fractional:
err = (matrix1 - matrix2) / matrix1
else:
err = matrix1 - matrix2
err[np.isnan(err)] = replacenan
#np.set_printoptions(threshold=800 * 800 * 2, precision=2)
#print err
#pl.imshow(np.log10(np.abs(err)))
#pl.show()
# write out the comparison fits
hduf = pyfits.PrimaryHDU(err)
hdua = pyfits.ImageHDU(matrix1)
hdub = pyfits.ImageHDU(matrix2)
thdulist = pyfits.HDUList([hduf, hdua, hdub])
try:
os.remove(filename)
print "overwriting existing fits file: " + filename
except OSError:
print "writing new fits file: " + filename
thdulist.writeto(filename)
return err
def compare_matrices(matrix1, matrix2, logplot=False):
"""Make a plot comparing two matrices"""
fig = plt.figure(1, [6, 3])
# first subplot
subplot1 = fig.add_subplot(121)
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
axins1 = inset_axes(subplot1,
width="50%",
height="5%",
loc=1)
if (logplot == True):
image1 = subplot1.imshow(np.log10(matrix1))
else:
image1 = subplot1.imshow(matrix1)
plt.colorbar(image1, cax=axins1, orientation="horizontal",
ticks=[1, 2, 3])
axins1.xaxis.set_ticks_position("bottom")
# second subplot
subplot2 = fig.add_subplot(122)
axins = inset_axes(subplot2,
width="5%",
height="50%",
loc=3,
bbox_to_anchor=(1.05, 0., 1, 1),
bbox_transform=subplot2.transAxes,
borderpad=0,
)
# Controlling the placement of the inset axes is basically same as that
# of the legend. you may want to play with the borderpad value and
# the bbox_to_anchor coordinate.
if (logplot == True):
image2 = subplot2.imshow(np.log10(matrix2))
else:
image2 = subplot2.imshow(matrix2)
plt.colorbar(image2, cax=axins, ticks=[1, 2, 3])
plt.draw()
plt.show()
# TODO check that the input arrays are numpy
# TODO make this a more rigorous or well-behaved CDF; currently cheap
def percentile_points(axis, pdf, percentiles):
"""Coarse way to find percentile points of a distribution given
as a numpy array
"""
cdf_points = np.zeros(len(percentiles), dtype=float)
for index, percentile in enumerate(percentiles):
try:
cdf = np.cumsum(pdf) / float(np.sum(pdf))
cdf_points[index] = np.nanmin(axis[cdf >= percentile])
except FloatingPointError:
print "percentile_points: invalid PDF"
cdf = np.zeros_like(pdf)
cdf_points[index] = -1.
return cdf_points
# TODO: test this more
def fraction_exceeds(vector, threshold):
"""Find the fraction of vector that exceeds some threshold"""
return float(len(np.where(vector > threshold)[0])) / float(len(vector))
def prob_exceed(axis, probability, threshold):
"""given x and P(x), find P(x>t)"""
try:
exceeding = np.where(axis > threshold)
integral = integrate.simps(probability[exceeding], axis[exceeding])
retval = integral / integrate.simps(probability, axis)
except FloatingPointError:
print "prob_exceed: invalid PDF"
retval = -1.
return retval
# TODO: remove trailing space
def fancy_vector(vector, format_string):
"""print a numpy vector with a format string"""
output = ""
for entry in vector:
output += (format_string + " ") % entry
return output
def pm_vector(vector):
"""convert a vector of e.g. 16, 50 and 84th percentiles into:
value (plus) upper error (minus) lower error
"""
return (vector[1], vector[2] - vector[1], vector[1] - vector[0])
def pm_error(vector, format_string):
"""print a numpy vector with three entries as central value plus errors"""
return (format_string + " +" + format_string + " -" + format_string) % \
pm_vector(vector)
def loginterpolate(x_vector, y_vector, xout, replace_nan=np.nan):
"""Do log10-linear interpolation
(out of range values are _not_ extrapolated)
The replace_nan flag in the output replaces these (out of bounds)
with another value
"""
interpolant = interpolate.interp1d(np.log10(x_vector), np.log10(y_vector),
bounds_error=False,
fill_value=np.nan)
yout = 10. ** interpolant(np.log10(xout))
if replace_nan != np.nan:
yout[np.isnan(yout)] = replace_nan
return yout
def cov_to_corr(matrix):
"""convert a covariance matrix to a correlation matrix"""
sqrtdiag = np.sqrt(np.diag(matrix))
return matrix / np.outer(sqrtdiag, sqrtdiag)
def spline_derivative(x_vector, y_vector):
"""Take a derivative using a spline"""
tck = interpolate.splrep(x_vector, y_vector, k=3)
deriv_y = np.array(interpolate.spalde(x_vector, tck))
deriv_y = deriv_y[:, 1] # take the first derivative of the spline
return deriv_y
def store_json(data, filename):
"""save some structure using json"""
with open(filename, 'w') as fileout:
fileout.write(json.dumps(data, separators=(', \n', ': ')))
# TODO: rewrite with integrate.simps(s ** moment, dnds); allowing moments
def dnds_to_ngts(s_vector, dnds):
"""Integrate the counts; default is N(>S).
Uniform (linear) spacing is assumed.
"""
delta_s = s_vector[1] - s_vector[0]
revngts = np.cumsum(dnds[::-1]) * delta_s
return revngts[::-1]
def numpy_recarray_to_dict(data, keyfield_name):
"""convert a numpy structured array to a dictionary
(easy to write as JSON) eg:
catalog_dict = utils.numpy_recarray_to_dict(augmented_catalog,
gp['keyfield_name'])
utils.store_json(catalog_dict, "catalog_human.dat")
"""
numrec = data.size
fielddtype = data.dtype.fields
names = fielddtype.keys()
formats = []
output_dict = {}
for name in names:
formats.append(fielddtype[name][0])
for dataindex in np.arange(numrec):
record_entry = {}
for name in names:
# TODO: remove keyfield_name from this list
record_entry[name] = data[dataindex][name].tolist()
output_dict[data[dataindex][keyfield_name]] = record_entry
return output_dict