This repository has been archived by the owner on May 12, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 112
/
metrics.py
475 lines (346 loc) · 16.8 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
'''
Classes:
Metric - Abstract Base Class from which all metrics must inherit.
'''
from abc import ABCMeta, abstractmethod
import ocw.utils as utils
import numpy
import numpy.ma as ma
from scipy.stats import mstats
class Metric(object):
'''Base Metric Class'''
__metaclass__ = ABCMeta
class UnaryMetric(Metric):
'''Abstract Base Class from which all unary metrics inherit.'''
__metaclass__ = ABCMeta
@abstractmethod
def run(self, target_dataset):
'''Run the metric for a given target dataset.
:param target_dataset: The dataset on which the current metric will
be run.
:type target_dataset: :class:`dataset.Dataset`
:returns: The result of evaluating the metric on the target_dataset.
'''
class BinaryMetric(Metric):
'''Abstract Base Class from which all binary metrics inherit.'''
__metaclass__ = ABCMeta
@abstractmethod
def run(self, ref_dataset, target_dataset):
'''Run the metric for the given reference and target datasets.
:param ref_dataset: The Dataset to use as the reference dataset when
running the evaluation.
:type ref_dataset: :class:`dataset.Dataset`
:param target_dataset: The Dataset to use as the target dataset when
running the evaluation.
:type target_dataset: :class:`dataset.Dataset`
:returns: The result of evaluation the metric on the reference and
target dataset.
'''
class Bias(BinaryMetric):
'''Calculate the bias between a reference and target dataset.'''
def run(self, ref_dataset, target_dataset):
'''Calculate the bias between a reference and target dataset.
.. note::
Overrides BinaryMetric.run()
:param ref_dataset: The reference dataset to use in this metric run.
:type ref_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run.
:type target_dataset: :class:`dataset.Dataset`
:returns: The difference between the reference and target datasets.
:rtype: :class:`numpy.ndarray`
'''
return calc_bias(target_dataset.values, ref_dataset.values)
class SpatialPatternTaylorDiagram(BinaryMetric):
''' Calculate the target to reference ratio of spatial standard deviation and pattern correlation'''
def run(self, ref_dataset, target_dataset):
'''Calculate two metrics to plot a Taylor diagram to compare spatial patterns
.. note::
Overrides BinaryMetric.run()
:param ref_dataset: The reference dataset to use in this metric run.
:type ref_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run.
:type target_dataset: :class:`dataset.Dataset`
:returns: standard deviation ratio, pattern correlation coefficient
:rtype: :float:'float','float'
'''
return ma.array([calc_stddev_ratio(target_dataset.values, ref_dataset.values), calc_correlation(target_dataset.values, ref_dataset.values)])
class TemporalStdDev(UnaryMetric):
'''Calculate the standard deviation over the time.'''
def run(self, target_dataset):
'''Calculate the temporal std. dev. for a datasets.
.. note::
Overrides UnaryMetric.run()
:param target_dataset: The target_dataset on which to calculate the
temporal standard deviation.
:type target_dataset: :class:`dataset.Dataset`
:returns: The temporal standard deviation of the target dataset
:rtype: :class:`ndarray`
'''
return calc_stddev(target_dataset.values, axis=0)
class StdDevRatio(BinaryMetric):
'''Calculate the standard deviation ratio between two datasets.'''
def run(self, ref_dataset, target_dataset):
'''Calculate the standard deviation ratio.
.. note::
Overrides BinaryMetric.run()
:param ref_dataset: The reference dataset to use in this metric run.
:type ref_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run.
:type target_dataset: :class:`dataset.Dataset`
:returns: The standard deviation ratio of the reference and target
'''
return calc_stddev_ratio(target_dataset.values, ref_dataset.values)
class PatternCorrelation(BinaryMetric):
'''Calculate the correlation coefficient between two datasets'''
def run(self, ref_dataset, target_dataset):
'''Calculate the correlation coefficient between two dataset.
.. note::
Overrides BinaryMetric.run()
:param ref_dataset: The reference dataset to use in this metric run.
:type ref_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run.
:type target_dataset: :class:`dataset.Dataset`
:returns: The correlation coefficient between a reference and target dataset.
'''
# stats.pearsonr returns correlation_coefficient, 2-tailed p-value
# We only care about the correlation coefficient
# Docs at
# http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html
return calc_correlation(target_dataset.values, ref_dataset.values)
class TemporalCorrelation(BinaryMetric):
'''Calculate the temporal correlation coefficients and associated
confidence levels between two datasets, using Pearson's correlation.'''
def run(self, reference_dataset, target_dataset):
'''Calculate the temporal correlation coefficients and associated
confidence levels between two datasets, using Pearson's correlation.
.. note::
Overrides BinaryMetric.run()
:param reference_dataset: The reference dataset to use in this metric
run
:type reference_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run
:type target_dataset: :class:`dataset.Dataset`
:returns: A 2D array of temporal correlation coefficients and a 2D
array of confidence levels associated with the temporal correlation
coefficients
'''
num_times, num_lats, num_lons = reference_dataset.values.shape
coefficients = ma.zeros([num_lats, num_lons])
for i in numpy.arange(num_lats):
for j in numpy.arange(num_lons):
coefficients[i, j] = calc_correlation(
target_dataset.values[:, i, j],
reference_dataset.values[:, i, j])
return coefficients
class TemporalMeanBias(BinaryMetric):
'''Calculate the bias averaged over time.'''
def run(self, ref_dataset, target_dataset):
'''Calculate the bias averaged over time.
.. note::
Overrides BinaryMetric.run()
:param ref_dataset: The reference dataset to use in this metric run.
:type ref_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run.
:type target_dataset: :class:`dataset.Dataset`
:returns: The mean bias between a reference and target dataset over time.
'''
return calc_bias(target_dataset.values, ref_dataset.values, average_over_time=True)
class RMSError(BinaryMetric):
'''Calculate the Root Mean Square Difference (RMS Error), with the mean
calculated over time and space.'''
def run(self, reference_dataset, target_dataset):
'''Calculate the Root Mean Square Difference (RMS Error), with the mean
calculated over time and space.
.. note::
Overrides BinaryMetric.run()
:param reference_dataset: The reference dataset to use in this metric
run
:type reference_dataset: :class:`dataset.Dataset`
:param target_dataset: The target dataset to evaluate against the
reference dataset in this metric run
:type target_dataset: :class:`dataset.Dataset`
:returns: The RMS error, with the mean calculated over time and space
'''
return calc_rmse(target_dataset.values, reference_dataset.values)
def calc_bias(target_array, reference_array, average_over_time=False):
''' Calculate difference between two arrays
:param target_array: an array to be evaluated, as model output
:type target_array: :class:'numpy.ma.core.MaskedArray'
:param reference_array: an array of reference dataset
:type reference_array: :class:'numpy.ma.core.MaskedArray'
:param average_over_time: if True, calculated bias is averaged for the axis=0
:type average_over_time: 'bool'
:returns: Biases array of the target dataset
:rtype: :class:'numpy.ma.core.MaskedArray'
'''
bias = target_array - reference_array
if average_over_time:
return ma.average(bias, axis=0)
else:
return bias
def calc_absbias(target_array, reference_array, average_over_time=False):
''' Calculate absolute difference between two arrays
:param target_array: an array to be evaluated, as model output
:type target_array: :class:'numpy.ma.core.MaskedArray'
:param reference_array: an array of reference dataset
:type reference_array: :class:'numpy.ma.core.MaskedArray'
:param average_over_time: if True, calculated bias is averaged for the axis=0
:type average_over_time: 'bool'
:returns: Absolute Biases array of the target dataset
:rtype: :class:'numpy.ma.core.MaskedArray'
'''
bias = abs(target_array - reference_array)
if average_over_time:
return ma.average(bias, axis=0)
else:
return bias
def calc_stddev(array, axis=None):
''' Calculate a sample standard deviation of an array along the array
:param array: an array to calculate sample standard deviation
:type array: :class:'numpy.ma.core.MaskedArray'
:param axis: Axis along which the sample standard deviation is computed.
:type axis: 'int'
:returns: sample standard deviation of array
:rtype: :class:'numpy.ma.core.MaskedArray'
'''
if isinstance(axis, int):
return ma.std(array, axis=axis, ddof=1)
else:
return ma.std(array, ddof=1)
def calc_stddev_ratio(target_array, reference_array):
''' Calculate ratio of standard deivations of the two arrays
:param target_array: an array to be evaluated, as model output
:type target_array: :class:'numpy.ma.core.MaskedArray'
:param reference_array: an array of reference dataset
:type reference_array: :class:'numpy.ma.core.MaskedArray'
:param average_over_time: if True, calculated bias is averaged for the axis=0
:type average_over_time: 'bool'
:returns: (standard deviation of target_array)/(standard deviation of reference array)
:rtype: :class:'float'
'''
return calc_stddev(target_array) / calc_stddev(reference_array)
def calc_correlation(target_array, reference_array):
'''Calculate the correlation coefficient between two arrays.
:param target_array: an array to be evaluated, as model output
:type target_array: :class:'numpy.ma.core.MaskedArray'
:param reference_array: an array of reference dataset
:type reference_array: :class:'numpy.ma.core.MaskedArray'
:returns: pearson's correlation coefficient between the two input arrays
:rtype: :class:'numpy.ma.core.MaskedArray'
'''
return mstats.pearsonr(reference_array.flatten(), target_array.flatten())[0]
def calc_rmse(target_array, reference_array):
''' Calculate ratio of standard deivations of the two arrays
:param target_array: an array to be evaluated, as model output
:type target_array: :class:'numpy.ma.core.MaskedArray'
:param reference_array: an array of reference dataset
:type reference_array: :class:'numpy.ma.core.MaskedArray'
:param average_over_time: if True, calculated bias is averaged for the axis=0
:type average_over_time: 'bool'
:returns: root mean square error
:rtype: :class:'float'
'''
return (ma.mean((calc_bias(target_array, reference_array))**2))**0.5
def calc_histogram_overlap(hist1, hist2):
''' from Lee et al. (2014)
:param hist1: a histogram array
:type hist1: :class:'numpy.ndarray'
:param hist2: a histogram array with the same size as hist1
:type hist2: :class:'numpy.ndarray'
'''
hist1_flat = hist1.flatten()
hist2_flat = hist2.flatten()
if len(hist1_flat) != len(hist2_flat):
err = "The two histograms have different sizes"
raise ValueError(err)
overlap = 0.
for ii in numpy.arange(len(hist1_flat)):
overlap = overlap + numpy.min([hist1_flat[ii], hist2_flat[ii]])
return overlap
def calc_joint_histogram(data_array1, data_array2, bins_for_data1, bins_for_data2):
''' Calculate a joint histogram of two variables in data_array1 and data_array2
:param data_array1: the first variable
:type data_array1: :class:'numpy.ma.core.MaskedArray'
:param data_array2: the second variable
:type data_array2: :class:'numpy.ma.core.MaskedArray'
:param bins_for_data1: histogram bin edges for data_array1
:type bins_for_data1: :class:'numpy.ndarray'
:param bins_for_data2: histogram bin edges for data_array2
:type bins_for_data2: :class:'numpy.ndarray'
'''
if ma.count_masked(data_array1) != 0 or ma.count_masked(data_array2) != 0:
index = numpy.where((data_array1.mask == False) &
(data_array2.mask == False))
new_array1 = data_array1[index]
new_array2 = data_array2[index]
else:
new_array1 = data_array1.flatten()
new_array2 = data_array2.flatten()
histo2d, xedge, yedge = numpy.histogram2d(new_array1, new_array2, bins=[
bins_for_data1, bins_for_data2])
return histo2d
def wet_spell_analysis(reference_array, threshold=0.1, nyear=1, dt=3.):
''' Characterize wet spells using sub-daily (hourly) data
:param reference_array: an array to be analyzed
:type reference_array: :class:'numpy.ma.core.MaskedArray'
:param threshold: the minimum amount of rainfall [mm/hour]
:type threshold: 'float'
:param nyear: the number of discontinous periods
:type nyear: 'int'
:param dt: the temporal resolution of reference_array
:type dt: 'float'
'''
nt = reference_array.shape[0]
if reference_array.ndim == 3:
reshaped_array = reference_array.reshape([nt, reference_array.size / nt])
else:
reshaped_array = reference_array
if ma.count_masked(reshaped_array[0,:]) != 0:
xy_indices = numpy.where(reshaped_array.mask[0, :] == False)[0]
else:
xy_indices = numpy.arange(reshaped_array.shape[1])
nt_each_year = nt / nyear
spell_duration = []
peak_rainfall = []
total_rainfall = []
for index in xy_indices:
for iyear in numpy.arange(nyear):
data0_temp = reshaped_array[nt_each_year * iyear:nt_each_year * (iyear + 1),
index]
# time indices when precipitation rate is smaller than the
# threshold [mm/hr]
t_index = numpy.where((data0_temp <= threshold) &
(data0_temp.mask == False))[0]
t_index = numpy.insert(t_index, 0, 0)
t_index = t_index + nt_each_year * iyear
for it in numpy.arange(t_index.size - 1):
if t_index[it + 1] - t_index[it] > 1:
data1_temp = data0_temp[t_index[it] + 1:t_index[it + 1]]
if not ma.is_masked(data1_temp):
spell_duration.append(
(t_index[it + 1] - t_index[it] - 1) * dt)
peak_rainfall.append(data1_temp.max())
total_rainfall.append(data1_temp.sum())
return numpy.array(spell_duration), numpy.array(peak_rainfall), numpy.array(total_rainfall)