Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up use cases, add new tabular prediction on MIMICIV #510

Merged
merged 3 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions cyclops/process/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
timestamp_col: str,
time_by: Union[str, List[str]],
agg_by: Union[str, List[str]],
timestep_size: int,
timestep_size: Optional[int] = None,
window_duration: Optional[int] = None,
imputer: Optional[AggregatedImputer] = None,
agg_meta_for: Optional[List[str]] = None,
Expand All @@ -78,9 +78,9 @@
self.timestamp_col = timestamp_col
self.time_by = to_list(time_by)
self.agg_by = to_list(agg_by)
self.agg_meta_for = to_list_optional(agg_meta_for)
self.timestep_size = timestep_size
self.window_duration = window_duration
self.agg_meta_for = to_list_optional(agg_meta_for)
self.window_times = pd.DataFrame() # Calculated when given the data
self.imputer = imputer
# Parameter checking
Expand All @@ -90,8 +90,8 @@
raise ValueError(
"Cannot compute meta for a column not being aggregated.",
)
if self.window_duration is not None:
divided = self.window_duration / self.timestep_size
if window_duration is not None and timestep_size is not None:
divided = window_duration / timestep_size
if divided != int(divided):
raise ValueError("Window duration be divisible by bucket size.")

Expand Down Expand Up @@ -568,6 +568,10 @@
raise NotImplementedError(
"Cannot currently vectorize data aggregated with no window duration.",
)
if self.timestep_size is None:
raise NotImplementedError(

Check warning on line 572 in cyclops/process/aggregate.py

View check run for this annotation

Codecov / codecov/patch

cyclops/process/aggregate.py#L572

Added line #L572 was not covered by tests
"Cannot currently vectorize data aggregated with no timestep size.",
)
num_timesteps = int(self.window_duration / self.timestep_size)
# Parameter checking
has_columns(aggregated, list(self.aggfuncs.keys()), raise_error=True)
Expand Down Expand Up @@ -605,8 +609,6 @@
data: pd.DataFrame,
window_start_time: Optional[pd.DataFrame] = None,
window_stop_time: Optional[pd.DataFrame] = None,
start_bound_func: Optional[Callable[[pd.Series], pd.Series]] = None,
stop_bound_func: Optional[Callable[[pd.Series], pd.Series]] = None,
) -> pd.DataFrame:
"""Aggregate temporal values.

Expand All @@ -622,10 +624,6 @@
window_stop_time: pd.DataFrame, optional
An optionally provided window stop time. This cannot be provided if
window_duration was set.
start_bound_func : Optional[Callable[[pd.Series], pd.Series]], optional
A function to bound the start timestamp values, by default None
stop_bound_func : Optional[Callable[[pd.Series], pd.Series]], optional
A function to bound the start timestamp values, by default None

Returns
-------
Expand All @@ -648,9 +646,6 @@
)
# Restrict the data according to the start/stop
data = self._restrict_by_timestamp(data)
# Filter the data based on bounds on start/stop
data = start_bound_func(data) if start_bound_func else data
data = stop_bound_func(data) if stop_bound_func else data
grouped = data.groupby(self.agg_by, sort=False)

return grouped.agg(self.aggfuncs)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ Tutorials

.. toctree::

tutorials_monitor
tutorials_use_cases
tutorials_monitor
Loading
Loading