Skip to content

Commit

Permalink
added mp.pool in _generate_samples (#150)
Browse files Browse the repository at this point in the history
# Pull Request Template

## Description

added `n_jobs` (`1` by default) to _generate_samples, leveraging mp.pool
to parallelize generation of bootstraps.
closes #149 

## Type of change

Please delete options that are not relevant.

- [x ] New feature (non-breaking change which adds functionality)
- [ x] This change requires a documentation update

## Checklist:

- [x ] My code follows the style guidelines of this project
- [ x] I have performed a self-review of my own code
- [ x] I have commented my code, particularly in hard-to-understand
areas
- [ x] I have made corresponding changes to the documentation
  • Loading branch information
astrogilda committed May 1, 2024
1 parent 6617cad commit 176399b
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 24 deletions.
57 changes: 42 additions & 15 deletions src/tsbootstrap/base_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import inspect
from collections.abc import Callable
from multiprocessing import Pool
from numbers import Integral
from typing import Optional

Expand Down Expand Up @@ -153,33 +154,59 @@ def _generate_samples(
X: np.ndarray,
return_indices: bool = False,
y=None,
n_jobs: int = 1,
):
"""Generate bootstrapped samples directly.
Parameters
----------
X : array-like of shape (n_samples, n_features)
X : array-like of shape (n_timepoints, n_features)
The input samples.
return_indices : bool, default=False
If True, a second output is retured, integer locations of
index references for the bootstrap sample, in reference to original indices.
Indexed values do are not necessarily identical with bootstrapped values.
y : array-like of shape (n_timepoints, n_features_exog), default=None
Exogenous time series to use in bootstrapping.
n_jobs : int, default=1
The number of jobs to run in parallel.
Yields
------
Iterator[np.ndarray]
An iterator over the bootstrapped samples.
"""
for _ in range(self.config.n_bootstraps):
indices, data = self._generate_samples_single_bootstrap(X=X, y=y)
data = np.concatenate(data, axis=0)

# hack to fix known issue with non-concatenated index sets
# see bug issue #81
if isinstance(indices, list):
indices = np.concatenate(indices, axis=0)

if return_indices:
yield data, indices # type: ignore
else:
yield data
if n_jobs == 1:
# Run bootstrap generation sequentially in the main process
for _ in range(self.config.n_bootstraps):
indices, data = self._generate_samples_single_bootstrap(X, y)
data = np.concatenate(data, axis=0)
if return_indices:
# hack to fix known issue with non-concatenated index sets
# see bug issue #81
if isinstance(indices, list):
indices = np.concatenate(indices, axis=0)
yield data, indices
else:
yield data
else:
# Use multiprocessing to handle bootstrapping
args = [(X, y) for _ in range(self.config.n_bootstraps)]
with Pool(n_jobs) as pool:
results = pool.starmap(
self._generate_samples_single_bootstrap, args
)

for indices, data in results:
data = np.concatenate(data, axis=0)
if return_indices:
# hack to fix known issue with non-concatenated index sets
# see bug issue #81
if isinstance(indices, list):
indices = np.concatenate(indices, axis=0)
yield data, indices
else:
yield data

def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None):
"""Generate list of bootstraps for a single bootstrap iteration."""
Expand Down
6 changes: 3 additions & 3 deletions src/tsbootstrap/block_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _generate_blocks(self, X: np.ndarray):
Parameters
----------
X : array-like of shape (n_samples, n_features)
X : array-like of shape (n_timepoints, n_features)
The input samples.
Returns
Expand Down Expand Up @@ -165,7 +165,7 @@ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None):
Parameters
----------
X : array-like of shape (n_samples, n_features)
X : array-like of shape (n_timepoints, n_features)
The input samples.
Returns
Expand Down Expand Up @@ -295,7 +295,7 @@ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None):
Parameters
----------
X : array-like of shape (n_samples, n_features)
X : array-like of shape (n_timepoints, n_features)
The input samples.
Returns
Expand Down
8 changes: 4 additions & 4 deletions src/tsbootstrap/tests/test_all_bootstraps.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ def test_bootstrap_input_output_contract(self, object_instance, scenario):
)

if not all(bs.ndim == 2 for bs in bss):
print([bs.shape for bs in bss])
raise ValueError(
f"{cls_name}.bootstrap yielded arrays with unexpected number of "
"dimensions. All bootstrap samples should have 2 dimensions."
f"{cls_name}.bootstrap yielded arrays with unexpected number of dimensions. All bootstrap samples should have 2 dimensions."
)

if not all(bs.shape[0] == n_timepoints for bs in bss):
Expand Down Expand Up @@ -218,9 +218,9 @@ def test_bootstrap_test_ratio(self, object_instance, scenario, test_ratio):
)

if not all(bs.ndim == 2 for bs in bss):
print([bs.shape for bs in bss])
raise ValueError(
f"{cls_name}.bootstrap yielded arrays with unexpected number of "
"dimensions. All bootstrap samples should have 2 dimensions."
f"{cls_name}.bootstrap yielded arrays with unexpected number of dimensions. All bootstrap samples should have 2 dimensions."
)

if not all(bs.shape[0] == expected_length for bs in bss):
Expand Down
6 changes: 4 additions & 2 deletions src/tsbootstrap/tsfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,13 @@ def set_params(self, **params):
self.model_params[key] = value
return self

'''
def __repr__(self):
"""
Official string representation of a TSFit object.
"""
return f"TSFit(order={self.order}, model_type='{self.model_type}')"
'''

def fit(self, X: np.ndarray, y=None) -> TSFit:
"""
Expand All @@ -247,7 +249,7 @@ def fit(self, X: np.ndarray, y=None) -> TSFit:
Parameters
----------
X : np.ndarray
Input data of shape (n_samples, n_features).
Input data of shape (n_timepoints, n_features).
y : np.ndarray, optional
Exogenous variables, by default None.
Expand Down Expand Up @@ -566,7 +568,7 @@ def predict(self, X: np.ndarray, y=None, n_steps: int = 1) -> np.ndarray:
Parameters
----------
X : np.ndarray
Input data of shape (n_samples, n_features).
Input data of shape (n_timepoints, n_features).
y : np.ndarray, optional
Exogenous variables, by default None.
n_steps : int, optional
Expand Down

0 comments on commit 176399b

Please sign in to comment.