-
Notifications
You must be signed in to change notification settings - Fork 20
/
highlevel.py
405 lines (326 loc) · 13 KB
/
highlevel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
"""High-level correctionlib objects
"""
import json
from numbers import Integral
from typing import Any, Callable, Dict, Iterator, List, Mapping, Union
import numpy
from packaging import version
import correctionlib._core
import correctionlib.version
_min_version_ak = version.parse("2.0.0")
_min_version_dak = version.parse("2024.1.1")
def open_auto(filename: str) -> str:
"""Open a file and return its contents"""
if filename.endswith(".json.gz"):
import gzip
with gzip.open(filename, "rt") as gzfile:
return gzfile.read()
elif filename.endswith(".json"):
with open(filename) as file:
return file.read()
raise ValueError(f"{filename}: unrecognized file format, expected .json, .json.gz")
def model_auto(data: str) -> Any:
"""Read schema version from json object and construct appropriate model"""
data = json.loads(data)
if not isinstance(data, dict):
raise ValueError("CorrectionSet is not a dictionary!")
version = data.get("schema_version", None)
if version is None:
raise ValueError("CorrectionSet has no schema version!")
if not isinstance(version, Integral):
raise ValueError(f"CorrectionSet schema version ({version}) is not an integer!")
if version == 1:
import correctionlib.schemav1
return correctionlib.schemav1.CorrectionSet.model_validate(data)
elif version == 2:
import correctionlib.schemav2
return correctionlib.schemav2.CorrectionSet.model_validate(data)
raise ValueError(f"Unknown CorrectionSet schema version ({version})")
# TODO: create a ufunc with numpy.vectorize in constructor?
def _call_as_numpy(
array_args: Any,
func: Callable[..., Any] = lambda: None,
non_array_args: Any = tuple(),
arg_indices: Any = tuple(),
**kwargs: Any,
) -> Any:
import awkward
if version.parse(awkward.__version__) < _min_version_ak:
raise RuntimeError(
f"""imported awkward is version {awkward.__version__} < {str(_min_version_ak)}
If you cannot upgrade, try doing: ak.flatten(arrays) -> result = correction(arrays) -> ak.unflatten(result, counts)
"""
)
if not isinstance(array_args, (list, tuple)):
array_args = (array_args,)
if all(
x.is_numpy or not isinstance(x, awkward.contents.Content) for x in array_args
):
vargs = [
awkward.to_numpy(awkward.typetracer.length_zero_if_typetracer(arg))
for arg in array_args
]
bargs = numpy.broadcast_arrays(*vargs)
oshape = bargs[0].shape
fargs = (arg.flatten() for arg in bargs)
repacked_args = [None] * len(arg_indices)
array_args_len = len(array_args)
for i in range(len(arg_indices)):
if i < array_args_len:
repacked_args[arg_indices[i]] = next(fargs)
else:
repacked_args[arg_indices[i]] = non_array_args[i - array_args_len]
out = func(*repacked_args)
out = awkward.contents.NumpyArray(out.reshape(oshape))
if awkward.backend(*array_args) == "typetracer":
out = out.to_typetracer(forget_length=True)
return out
return None
def _wrap_awkward(
func: Callable[..., Any],
*args: Union["numpy.ndarray[Any, Any]", str, int, float],
) -> Any:
from functools import partial
import awkward
array_args = []
non_array_args = []
array_indices = []
non_array_indices = []
for iarg, arg in enumerate(args):
if not isinstance(arg, (str, int, float)):
array_args.append(arg)
array_indices.append(iarg)
else:
non_array_args.append(arg)
non_array_indices.append(iarg)
array_args = awkward.broadcast_arrays(*array_args)
arg_indices = array_indices + non_array_indices
tocall = partial(
_call_as_numpy,
func=func, # type: ignore
non_array_args=non_array_args,
arg_indices=arg_indices,
)
return awkward.transform(tocall, *array_args)
def _call_dask_correction(
correction: Any,
*args: Union["numpy.ndarray[Any, Any]", str, int, float],
):
return _wrap_awkward(correction._base.evalv, *args)
def _wrap_dask_awkward(
correction: Any,
*args: Union["numpy.ndarray[Any, Any]", str, int, float],
) -> Any:
import dask.delayed
import dask_awkward
if version.parse(dask_awkward.__version__) < _min_version_dak:
raise RuntimeError(
f"""imported dask_awkward is version {dask_awkward.__version__} < {str(_min_version_dak)}
This version of dask_awkward includes several useful bugfixes and functionality extensions.
Please upgrade dask_awkward.
"""
)
if not hasattr(correction, "_delayed_correction"):
setattr( # noqa: B010
correction,
"_delayed_correction",
dask.delayed(correction),
)
correction_meta = _wrap_awkward(
correction._base.evalv,
*(arg._meta if isinstance(arg, dask_awkward.Array) else arg for arg in args),
)
return dask_awkward.map_partitions(
_call_dask_correction,
correction._delayed_correction,
*args,
meta=correction_meta,
label=correction._name,
)
class Correction:
"""High-level correction evaluator object
This class is typically instantiated by accessing a named correction from
a CorrectionSet object, rather than directly by construction.
"""
def __init__(self, base: correctionlib._core.Correction, context: "CorrectionSet"):
self._base = base
self._name = base.name
self._context = context
def __getstate__(self) -> Dict[str, Any]:
return {"_context": self._context, "_name": self._name}
def __setstate__(self, state: Dict[str, Any]) -> None:
self._context = state["_context"]
self._name = state["_name"]
self._base = self._context[self._name]._base
@property
def name(self) -> str:
return self._name
@property
def description(self) -> str:
return self._base.description
@property
def version(self) -> int:
return self._base.version
@property
def inputs(self) -> List[correctionlib._core.Variable]:
return self._base.inputs
@property
def output(self) -> correctionlib._core.Variable:
return self._base.output
def evaluate(
self, *args: Union["numpy.ndarray[Any, Any]", str, int, float]
) -> Union[float, "numpy.ndarray[Any, numpy.dtype[numpy.float64]]"]:
# TODO: create a ufunc with numpy.vectorize in constructor?
if any(str(type(arg)).startswith("<class 'dask.array.") for arg in args):
raise TypeError(
"Correctionlib does not yet handle dask.array collections. "
"If you require this functionality (i.e. you cannot or do "
"not want to use dask_awkward/awkward arrays) please open an "
"issue at https://github.com/cms-nanoAOD/correctionlib/issues."
)
try:
vargs = [
numpy.asarray(arg)
for arg in args
if not isinstance(arg, (str, int, float))
]
except NotImplementedError:
if any(str(type(arg)).startswith("<class 'dask_awkward.") for arg in args):
return _wrap_dask_awkward(self, *args) # type: ignore
except (ValueError, TypeError):
if any(str(type(arg)).startswith("<class 'awkward.") for arg in args):
return _wrap_awkward(self._base.evalv, *args) # type: ignore
except Exception as err:
raise err
if vargs:
bargs = numpy.broadcast_arrays(*vargs)
oshape = bargs[0].shape
fargs = (arg.flatten() for arg in bargs)
out = self._base.evalv(
*(
next(fargs) if not isinstance(arg, (str, int, float)) else arg
for arg in args
)
)
return out.reshape(oshape)
return self._base.evaluate(*args) # type: ignore
class CompoundCorrection:
"""High-level compound correction evaluator object
This class is typically instantiated by accessing a named correction from
a CorrectionSet object, rather than directly by construction.
"""
def __init__(
self, base: correctionlib._core.CompoundCorrection, context: "CorrectionSet"
):
self._base = base
self._name = base.name
self._context = context
def __getstate__(self) -> Dict[str, Any]:
return {"_context": self._context, "_name": self._name}
def __setstate__(self, state: Dict[str, Any]) -> None:
self._context = state["_context"]
self._name = state["_name"]
self._base = self._context.compound[self._name]._base
@property
def name(self) -> str:
return self._name
@property
def description(self) -> str:
return self._base.description
@property
def inputs(self) -> List[correctionlib._core.Variable]:
return self._base.inputs
@property
def output(self) -> correctionlib._core.Variable:
return self._base.output
def evaluate(
self, *args: Union["numpy.ndarray[Any, Any]", str, int, float]
) -> Union[float, "numpy.ndarray[Any, numpy.dtype[numpy.float64]]"]:
# TODO: create a ufunc with numpy.vectorize in constructor?
if any(str(type(arg)).startswith("<class 'dask.array.") for arg in args):
raise TypeError(
"Correctionlib does not yet handle dask.array collections. "
"if you require this functionality (i.e. you cannot or do "
"not want to use dask_awkward/awkward arrays) please open an "
"issue at https://github.com/cms-nanoAOD/correctionlib/issues."
)
try:
vargs = [
numpy.asarray(arg)
for arg in args
if not isinstance(arg, (str, int, float))
]
except NotImplementedError:
if any(str(type(arg)).startswith("<class 'dask_awkward.") for arg in args):
return _wrap_dask_awkward(self, *args) # type: ignore
except (ValueError, TypeError):
if any(str(type(arg)).startswith("<class 'awkward.") for arg in args):
return _wrap_awkward(self._base.evalv, *args) # type: ignore
except Exception as err:
raise err
if vargs:
bargs = numpy.broadcast_arrays(*vargs)
oshape = bargs[0].shape
fargs = (arg.flatten() for arg in bargs)
out = self._base.evalv(
*(
next(fargs) if not isinstance(arg, (str, int, float)) else arg
for arg in args
)
)
return out.reshape(oshape)
return self._base.evaluate(*args) # type: ignore
class _CompoundMap(Mapping[str, CompoundCorrection]):
def __init__(
self,
base: Mapping[str, correctionlib._core.CompoundCorrection],
context: "CorrectionSet",
):
self._base = base
self._context = context
def __getitem__(self, key: str) -> CompoundCorrection:
corr = self._base.__getitem__(key)
return CompoundCorrection(corr, self._context)
def __len__(self) -> int:
return len(self._base)
def __iter__(self) -> Iterator[str]:
return iter(self._base)
class CorrectionSet(Mapping[str, Correction]):
"""High-level correction set evaluator object
This class can be initialized directly from a string or model with compatible
schema version, or can be initialized via the ``from_file`` or
``from_string`` factory methods. Corrections can be accessed
via getitem syntax, e.g. ``cset["some correction"]``.
"""
def __init__(self, data: Any):
if isinstance(data, str):
self._data = data
else:
self._data = data.model_dump_json(exclude_unset=True)
self._base = correctionlib._core.CorrectionSet.from_string(self._data)
@classmethod
def from_file(cls, filename: str) -> "CorrectionSet":
return cls(open_auto(filename))
@classmethod
def from_string(cls, data: str) -> "CorrectionSet":
return cls(data)
def __getstate__(self) -> Dict[str, Any]:
return {"_data": self._data}
def __setstate__(self, state: Dict[str, Any]) -> None:
self._data = state["_data"]
self._base = correctionlib._core.CorrectionSet.from_string(self._data)
def _ipython_key_completions_(self) -> List[str]:
return list(self.keys())
@property
def schema_version(self) -> int:
return self._base.schema_version
def __getitem__(self, key: str) -> Correction:
corr = self._base.__getitem__(key)
return Correction(corr, self)
def __len__(self) -> int:
return len(self._base)
def __iter__(self) -> Iterator[str]:
return iter(self._base)
@property
def compound(self) -> _CompoundMap:
return _CompoundMap(self._base.compound, self)