Skip to content

Commit

Permalink
rename apply_selections to selected_datasets, docs, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
douglasdavis committed May 28, 2019
1 parent db65100 commit 2949a07
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 16 deletions.
4 changes: 2 additions & 2 deletions tests/test_data.py
Expand Up @@ -267,14 +267,14 @@ def test_columnrming():
assert r not in list_of_cols_after


def test_apply_selections():
def test_selected_datasets():
ds2 = from_root(
"tests/data/test_file.root",
auxweights=["pT_lep1", "pT_lep2", "pT_jet1"],
name="myds",
)

splits = ds2.apply_selections(
splits = ds2.selected_datasets(
{"s1": "(pT_lep2 > 30) & (pT_jet1 < 50)", "s2": "(reg2j1b==True)"}
)

Expand Down
22 changes: 9 additions & 13 deletions twaml/data.py
Expand Up @@ -163,7 +163,7 @@ def shape(self, new) -> None:
raise NotImplementedError("Cannot set shape manually")

@property
def wtloop_metas(self) -> Optional[Dict[str, Dict]]:
def wtloop_metas(self) -> Optional[Dict[str, Dict[str, Any]]]:
"""dictionary of metadata information (one for each file making up the dataset)"""
return self._wtloop_metas

Expand Down Expand Up @@ -543,31 +543,27 @@ def selection_masks(self, selections: Dict[str, str]) -> Dict[str, np.ndarray]:
selections:
Dictionary of selections in the form ``{ name : selection }``.
Returns
-------
Dict[str, dataset]
A dictionary of ``{ selection name : bool array }`` satisfying the selections
"""
masks = {}
for sel_key, sel_val in selections.items():
masks[sel_key] = np.asarray(self.df.eval(sel_val))
return masks

def apply_selections(self, selections: Dict[str, str]) -> Dict[str, "dataset"]:
def selected_datasets(self, selections: Dict[str, str]) -> Dict[str, "dataset"]:
"""Based on a dictionary of selections, break the dataset into a set
of multiple (finer grained) datasets.
Warnings
--------
For large datasets this can get memory intensive quickly. A
good alternative is :meth:`selection_masks` combined with the
``__getitem__`` implementation.
Parameters
----------
selections:
Dictionary of selections in the form ``{ name : selection }``.
Returns
-------
Dict[str, dataset]
A dictionary of datasets satisfying the selections
Examples
--------
Expand All @@ -578,7 +574,7 @@ def apply_selections(self, selections: Dict[str, str]) -> Dict[str, "dataset"]:
... '2j1b' : '(reg2j1b == True) & (OS == True) & (elmu == True)',
... '2j2b' : '(reg2j2b == True) & (OS == True) & (elmu == True)',
... '3j1b' : '(reg3j1b == True) & (OS == True) & (elmu == True)'}
>>> selected_datasets = ds.apply_selections(selections)
>>> selected_datasets = ds.selected_datasets(selections)
"""
breaks = {}
Expand Down
2 changes: 1 addition & 1 deletion twaml/version.py
@@ -1,6 +1,6 @@
import re

__version__ = "0.9.3"
__version__ = "0.10.0"
version = __version__
version_info = tuple(re.split(r"[-\.]", __version__))

Expand Down

0 comments on commit 2949a07

Please sign in to comment.