-
Notifications
You must be signed in to change notification settings - Fork 26
/
qmc_analysis.py
333 lines (277 loc) · 12.3 KB
/
qmc_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
"""Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis.
Please refer to the article below for the basic approach used here.
https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis
"""
import logging
import numpy as np
from easyvvuq import OutputType
from .base import BaseAnalysisElement
from easyvvuq.sampling import QMCSampler
from .results import AnalysisResults
from easyvvuq.sampling import MCSampler
from .ensemble_boot import confidence_interval
__author__ = 'Jalal Lakhlili'
__license__ = "LGPL"
logger = logging.getLogger(__name__)
class QMCAnalysisResults(AnalysisResults):
"""Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class
documentation for details on using it.
"""
def _get_sobols_first(self, qoi, input_):
raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
def _get_sobols_total(self, qoi, input_):
raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total'])
return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
def _get_sobols_second(self, qoi, input_):
raise NotImplementedError
def _get_sobols_first_conf(self, qoi, input_):
raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first'])
return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
def _get_sobols_total_conf(self, qoi, input_):
raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total'])
return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
def supported_stats(self):
"""Types of statistics supported by the describe method.
Returns
-------
list of str
"""
return ['mean', 'var', 'std']
def _describe(self, qoi, statistic):
if statistic not in self.supported_stats():
raise NotImplementedError
return self.raw_data['statistical_moments'][qoi][statistic][0]
class QMCAnalysis(BaseAnalysisElement):
def __init__(self, sampler, qoi_cols=None):
"""Analysis element for Quasi-Monte Carlo (QMC).
Parameters
----------
sampler : easyvvuq.sampling.qmc.QMCSampler
Sampler used to initiate the QMC analysis
qoi_cols : list or None
Column names for quantities of interest (for which analysis is to be
performed).
"""
if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler):
raise RuntimeError(
'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component')
if qoi_cols is None:
self.qoi_cols = list(sampler.vary.get_keys())
else:
self.qoi_cols = qoi_cols
self.output_type = OutputType.SUMMARY
self.sampler = sampler
def element_name(self):
"""Name for this element.
Return
------
str:
"QMC_Analysis"
"""
return "QMC_Analysis"
def element_version(self):
"""Version of this element.
Return
------
str:
Element version.
"""
return "0.2"
def analyse(self, data_frame):
"""Perform QMC analysis on a given pandas DataFrame.
Parameters
----------
data_frame : pandas DataFrame
Input data for analysis.
Returns
-------
easyvvuq.analysis.qmc.QMCAnalysisResults
AnalysisResults object for QMC.
"""
if data_frame.empty:
raise RuntimeError(
"No data in data frame passed to analyse element")
qoi_cols = self.qoi_cols
results = {
'statistical_moments': {k: {} for k in qoi_cols},
'sobols_first': {k: {} for k in qoi_cols},
'sobols_total': {k: {} for k in qoi_cols},
'conf_sobols_first': {k: {} for k in qoi_cols},
'conf_sobols_total': {k: {} for k in qoi_cols}
}
# Extract output values for each quantity of interest from Dataframe
samples = self.get_samples(data_frame)
# Compute descriptive statistics for each quantity of interest
for k in qoi_cols:
results['statistical_moments'][k] = {'mean': np.mean(samples[k], axis=0),
'var': np.var(samples[k], axis=0),
'std': np.std(samples[k], axis=0)}
sobols_first, conf_first, sobols_total, conf_total = \
self.sobol_bootstrap(samples[k])
results['sobols_first'][k] = sobols_first
results['sobols_total'][k] = sobols_total
results['conf_sobols_first'][k] = conf_first
results['conf_sobols_total'][k] = conf_total
return QMCAnalysisResults(raw_data=results, samples=data_frame,
qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
def get_samples(self, data_frame):
"""
Converts the Pandas dataframe into a dictionary.
Parameters
----------
data_frame : pandas DataFrame
the EasyVVUQ Pandas dataframe from collation.
Returns
-------
dict :
A dictionary with the QoI names as keys.
Each element is a list of code evaluations.
"""
samples = {k: [] for k in self.qoi_cols}
for run_id in data_frame['run_id'].squeeze().unique():
for k in self.qoi_cols:
data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k]
samples[k].append(data.values)
return samples
def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
"""
Computes the first order and total order Sobol indices using Saltelli's
method. To assess the sampling inaccuracy, bootstrap confidence intervals
are also computed.
Reference: A. Saltelli, Making best use of model evaluations to compute
sensitivity indices, Computer Physics Communications, 2002.
Parameters
----------
samples : list
The samples for a given QoI.
alpha: float
The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
n_samples: int
The number of bootstrap samples. The default is 1000.
Returns
-------
sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
dictionaries containing the first- and total-order Sobol indices for all
parameters, and (1-alpha)*100 lower and upper confidence bounds.
"""
assert len(samples) > 0
assert alpha > 0.0
assert alpha < 1.0
assert n_samples > 0
# convert to array
samples = np.array(samples)
# the number of parameter and the number of MC samples in n_mc * (n_params + 2)
# and the size of the QoI
n_params = self.sampler.n_params
n_mc = self.sampler.n_mc_samples
n_qoi = samples[0].size
sobols_first_dict = {}
conf_first_dict = {}
sobols_total_dict = {}
conf_total_dict = {}
for j, param_name in enumerate(self.sampler.vary.get_keys()):
# code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
# see reference above.
f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
# our point estimate for the 1st and total order Sobol indices
value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
# array for resampled estimates
sobols_first = np.zeros([n_samples, n_qoi])
sobols_total = np.zeros([n_samples, n_qoi])
for i in range(n_samples):
# resample, must be done on already seperated output due to
# the specific order in samples
idx = np.random.randint(0, n_mc - 1, n_mc)
f_M2_resample = f_M2[idx]
f_M1_resample = f_M1[idx]
f_Ni_resample = f_Ni[idx]
# recompute Sobol indices
sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample,
f_Ni_resample[:, j])
sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample,
f_Ni_resample[:, j])
# compute confidence intervals
_, low_first, high_first = confidence_interval(sobols_first, value_first,
alpha, pivotal=True)
_, low_total, high_total = confidence_interval(sobols_total, value_total,
alpha, pivotal=True)
# store results
sobols_first_dict[param_name] = value_first
conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
sobols_total_dict[param_name] = value_total
conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
# Adapted from SALib
@staticmethod
def _separate_output_values(samples, n_params, n_mc_samples):
"""There are n_params + 2 different input matrices: M1, M2, N_i,
i=1,...,n_params. (see reference under sobol_bootstrap). The
EasyVVUQ dataframe is stored in the order:
[sample from M2, sample from N1, N2, ... sample from N_n_params,
sample from M1, repeat].
This subroutine separates the output values into the contributions
of the different input matrices.
Parameters
----------
samples: list
The samples for a given QoI
n_params: int
The number of uncertain input parameters.
n_mc_samples: int
The number of MC samples per input matrix, i.e. the
number of rows in M1, M2 or Ni.
Returns
-------
NumPy arrays of the separated code evaluations: f_M2, f_M1, f_Ni, where
f_Ni contains n_params entries corresponding to the n_params Ni matrices.
"""
evaluations = np.array(samples)
shape = (n_mc_samples, n_params) + evaluations[0].shape
step = n_params + 2
f_Ni = np.zeros(shape)
f_M2 = evaluations[0:evaluations.shape[0]:step]
f_M1 = evaluations[(step - 1):evaluations.shape[0]:step]
for i in range(n_params):
f_Ni[:, i] = evaluations[(i + 1):evaluations.shape[0]:step]
return f_M2, f_M1, f_Ni
@staticmethod
def _first_order(f_M2, f_M1, f_Ni):
"""Calculate first order sensitivity indices.
Parameters
----------
f_M2: NumPy array
Array of code evaluations on input array M2
f_M1: NumPy array
Array of code evaluations on input array M1
f_Ni: NumPy array
Array of code evaluations on input array Ni, i=1,...,n_params
Returns
-------
A NumPy array of the n_params first-order Sobol indices.
"""
V = np.var(np.r_[f_M2, f_M1], axis=0)
return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0)
@staticmethod
def _total_order(f_M2, f_M1, f_Ni):
"""Calculate total order sensitivity indices. See also:
A Saltelli et al, Variance based sensitivity analysis of model output.
Design and estimator for the total sensitivity index, 2009.
Parameters
----------
f_M2: NumPy array
Array of code evaluations on input array M2 (matrix A in ref above)
f_M1: NumPy array
Array of code evaluations on input array M1 (matrix B in ref above)
f_Ni: NumPy array
Array of code evaluations on input array Ni, i=1,...,n_params
(matrix AB in ref above)
Returns
-------
A NumPy array of the n_params total-order Sobol indices.
"""
V = np.var(np.r_[f_M2, f_M1], axis=0)
return 0.5 * np.mean((f_M2 - f_Ni) ** 2, axis=0) / (V + (V == 0)) * (V != 0)