/
facade.py
310 lines (270 loc) · 9.12 KB
/
facade.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
from typing import Optional
import numpy as np
from physt.util import deprecation_alias
from physt.histogram1d import Histogram1D
from physt.histogram_nd import HistogramND, Histogram2D
from physt.binnings import calculate_bins_nd
from physt.histogram_collection import HistogramCollection
from physt.special_histograms import (
polar,
azimuthal,
radial,
cylindrical,
cylindrical_surface,
spherical,
spherical_surface,
)
def h1(data, bins=None, *args, **kwargs) -> Histogram1D:
"""Facade function to create 1D histograms.
This proceeds in three steps:
1) Based on magical parameter bins, construct bins for the histogram
2) Calculate frequencies for the bins
3) Construct the histogram object itself
*Guiding principle:* parameters understood by numpy.histogram should be
understood also by physt.histogram as well and should result in a Histogram1D
object with (h.numpy_bins, h.frequencies) same as the numpy.histogram
output. Additional functionality is a bonus.
Parameters
----------
data : array_like, optional
Container of all the values (tuple, list, np.ndarray, pd.Series)
bins: int or sequence of scalars or callable or str, optional
If iterable => the bins themselves
If int => number of bins for default binning
If callable => use binning method (+ args, kwargs)
If string => use named binning method (+ args, kwargs)
weights: array_like, optional
(as numpy.histogram)
keep_missed: Optional[bool]
store statistics about how many values were lower than limits
and how many higher than limits (default: True)
dropna: bool
whether to clear data from nan's before histogramming
name: str
name of the histogram
axis_name: str
name of the variable on x axis
adaptive: bool
whether we want the bins to be modifiable
(useful for continuous filling of a priori unknown data)
dtype: type
customize underlying data type: default int64 (without weight) or float (with weights)
Other numpy.histogram parameters are excluded, see the methods of the Histogram1D class itself.
See Also
--------
numpy.histogram
"""
import numpy as np
from .histogram1d import Histogram1D, calculate_frequencies
from .binnings import calculate_bins
adaptive = kwargs.pop("adaptive", False)
dtype = kwargs.pop("dtype", None)
if isinstance(data, tuple) and isinstance(
data[0], str
): # Works for groupby DataSeries
return histogram(data[1], bins, *args, name=data[0], **kwargs)
elif type(data).__name__ == "DataFrame":
raise ValueError("Cannot create histogram from a pandas DataFrame. Use Series.")
# Collect arguments (not to send them to binning algorithms)
dropna = kwargs.pop("dropna", True)
weights = kwargs.pop("weights", None)
keep_missed = kwargs.pop("keep_missed", True)
name = kwargs.pop("name", None)
axis_name = kwargs.pop("axis_name", None)
title = kwargs.pop("title", None)
# Convert to array
if data is not None:
array = np.asarray(data) # .flatten()
if dropna:
array = array[~np.isnan(array)]
else:
array = None
# Get binning
binning = calculate_bins(
array,
bins,
*args,
check_nan=not dropna and array is not None,
adaptive=adaptive,
**kwargs
)
# bins = binning.bins
# Get frequencies
if array is not None:
(frequencies, errors2, underflow, overflow, stats) = calculate_frequencies(
array, binning=binning, weights=weights, dtype=dtype
)
else:
frequencies = None
errors2 = None
underflow = 0
overflow = 0
stats = {"sum": 0.0, "sum2": 0.0}
# Construct the object
if not keep_missed:
underflow = 0
overflow = 0
if not axis_name:
if hasattr(data, "name"):
axis_name = data.name
elif (
hasattr(data, "fields")
and len(data.fields) == 1
and isinstance(data.fields[0], str)
):
# Case of dask fields (examples)
axis_name = data.fields[0]
return Histogram1D(
binning=binning,
frequencies=frequencies,
errors2=errors2,
overflow=overflow,
underflow=underflow,
stats=stats,
dtype=dtype,
keep_missed=keep_missed,
name=name,
axis_name=axis_name,
title=title,
)
def h2(data1, data2, bins=10, *args, **kwargs) -> Histogram2D:
"""Facade function to create 2D histograms.
For implementation and parameters, see histogramdd.
See Also
--------
numpy.histogram2d
histogramdd
"""
import numpy as np
# guess axis names
if "axis_names" not in kwargs:
if hasattr(data1, "name") and hasattr(data2, "name"):
kwargs["axis_names"] = [data1.name, data2.name]
if data1 is not None and data2 is not None:
data1 = np.asarray(data1)
data2 = np.asarray(data2)
data = np.concatenate([data1[:, np.newaxis], data2[:, np.newaxis]], axis=1)
else:
data = None
return histogramdd(data, bins, *args, dim=2, **kwargs)
def h3(data, *args, **kwargs) -> HistogramND:
"""Facade function to create 3D histograms.
Parameters
----------
data : array_like or list[array_like] or tuple[array_like]
Can be a single array (with three columns) or three different arrays
(for each component)
"""
if (
data is not None
and isinstance(data, (list, tuple))
and not np.isscalar(data[0])
):
if "axis_names" not in kwargs:
kwargs["axis_names"] = [
(column.name if hasattr(column, "name") else None) for column in data
]
data = np.concatenate([item[:, np.newaxis] for item in data], axis=1)
else:
kwargs["dim"] = 3
return h(data, *args, **kwargs)
def h(data, bins=10, *args, adaptive = False,
dropna = True,
name: Optional[str] = None,
title: Optional[str] = None,
axis_names = None,
dim: Optional[int] = None,
weights = None,
**kwargs) -> HistogramND:
"""Facade function to create n-dimensional histograms.
3D variant of this function is also aliased as "h3".
Parameters
----------
data : array_like
Container of all the values
bins: Any
weights: array_like, optional
(as numpy.histogram)
dropna: bool
whether to clear data from nan's before histogramming
name: str
name of the histogram
axis_names: Iterable[str]
names of the variable on x axis
adaptive:
whether the bins should be updated when new non-fitting value are filled
dtype: Optional[type]
Underlying type for the histogram.
If weights are specified, default is float. Otherwise int64
dim: int
Dimension - necessary if you are creating an empty adaptive histogram
See Also
--------
numpy.histogramdd
"""
# pandas - guess axis names
if not axis_names:
if hasattr(data, "columns"):
try:
axis_names = tuple(data.columns)
except:
pass # Perhaps columns has different meaning here.
# Prepare and check data
# Convert to array
if data is not None:
data = np.asarray(data)
if data.ndim != 2:
raise ValueError(
"Array must have shape (n, d), {0} encountered".format(data.shape)
)
if dim is not None and dim != data.shape[1]:
raise ValueError("Dimension mismatch: {0}!={1}".format(dim, data.shape[1]))
_, dim = data.shape
if dropna:
data = data[~np.isnan(data).any(axis=1)]
check_nan = not dropna
else:
if dim is None:
raise ValueError("You have to specify either data or its dimension.")
data = np.zeros((0, dim))
check_nan = False
# Prepare bins
bin_schemas = calculate_bins_nd(
data, bins, *args, dim=dim, check_nan=check_nan, adaptive=adaptive, **kwargs
)
# Prepare remaining data
klass = Histogram2D if dim == 2 else HistogramND
if name:
kwargs["name"] = name
if title:
kwargs["title"] = title
if axis_names:
kwargs["axis_names"] = axis_names
return klass.from_calculate_frequencies(
data, binnings=bin_schemas, weights=weights, **kwargs
)
# Aliases
histogram = deprecation_alias(h1, "histogram")
histogram2d = deprecation_alias(h2, "histogram2d")
histogramdd = deprecation_alias(h, "histogramdd")
def collection(data, bins=10, *args, **kwargs) -> HistogramCollection:
"""Create histogram collection with shared binnning."""
if hasattr(data, "columns"):
data = {column: data[column] for column in data.columns}
return HistogramCollection.multi_h1(data, bins, *args, **kwargs)
__all__ = [
"h1",
"h2",
"h3",
"histogram",
"histogram2d",
"histogramdd",
"collection",
"polar",
"azimuthal",
"radial",
"cylindrical",
"cylindrical_surface",
"spherical",
"spherical_surface",
]