Skip to content

Commit

Permalink
Merge b77e52b into e0a18f2
Browse files Browse the repository at this point in the history
  • Loading branch information
qtux committed Dec 7, 2018
2 parents e0a18f2 + b77e52b commit d0bd406
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 2 deletions.
48 changes: 48 additions & 0 deletions examples/plot_function_xy_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
'''
====================================
Simple FunctionXYTransformer Example
====================================
This example demonstrates how to execute arbitrary functions on time series data using the
FunctionXYTransformer.
'''

# Author: Matthias Gazzari
# License: BSD

from seglearn.transform import FunctionXYTransformer, SegmentXY
from seglearn.base import TS_Data

import numpy as np

def choose_cols(Xt, yt, cols):
return [time_series[:, cols] for time_series in Xt], yt

def invert_y(Xt, yt):
return Xt, np.logical_not(yt)

# Multivariate time series with 4 samples of 3 variables
X = [
np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]),
np.array([[30, 40, 50], [60, 70, 80], [90, 100, 110]]),
]
# Time series target
y = [
np.array([True, False, False, True]),
np.array([False, True, False]),
]

trans = FunctionXYTransformer(choose_cols, func_kwargs={"cols":[0,1]})
X, y, _ = trans.fit_transform(X, y)

segment = SegmentXY(width=3, overlap=1)
X, y, _ = segment.fit_transform(X, y)

print("X:", X)
print("y: ", y)

trans = FunctionXYTransformer(invert_y)
X, y, _ = trans.fit_transform(X, y)

print("~y: ", y)
3 changes: 2 additions & 1 deletion seglearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
__all__ = ['TS_Data', 'FeatureRep', 'FeatureRepMix', 'PadTrunc', 'Interp', 'Pype', 'SegmentX',
'SegmentXY', 'SegmentXYForecast', 'TemporalKFold', 'temporal_split', 'check_ts_data',
'check_ts_data_with_ts_target', 'ts_stats', 'get_ts_data_parts', 'all_features',
'base_features', 'load_watch', 'TargetRunLengthEncoder', '__version__']
'base_features', 'load_watch', 'TargetRunLengthEncoder', 'FunctionXYTransformer',
'__version__']

__author__ = 'David Burns david.mo.burns@gmail.com'
94 changes: 94 additions & 0 deletions seglearn/tests/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Author: David Burns
# License: BSD

import pytest

import numpy as np

import seglearn.transform as transform
Expand Down Expand Up @@ -447,3 +449,95 @@ def test_feature_rep_mix():
Xt = uni_union.transform(X)
assert Xt.shape[0] == len(X)
assert len(uni_union.f_labels) == Xt.shape[1]


def test_function_xy_transform():
constant = 10
identity = transform.FunctionXYTransformer()
def replace(Xt, yt, value):
return np.ones(Xt.shape) * value, np.ones(yt.shape) * value

custom = transform.FunctionXYTransformer(replace, func_kwargs={"value": constant})

# univariate ts
X = np.random.rand(100, 10)
y = np.ones(100)

identity.fit(X, y)
Xtrans, ytrans, _ = identity.transform(X, y)
assert Xtrans is X
assert ytrans is y

custom.fit(X, y)
Xtrans, ytrans, _ = custom.transform(X, y)
assert np.array_equal(Xtrans, np.ones(X.shape) * constant)
assert np.array_equal(ytrans, np.ones(y.shape) * constant)

# multivariate ts
X = np.random.rand(100, 10, 4)
y = np.ones(100)

identity.fit(X, y)
Xtrans, ytrans, _ = identity.transform(X, y)
assert Xtrans is X
assert ytrans is y

custom.fit(X, y)
Xtrans, ytrans, _ = custom.transform(X, y)
assert np.array_equal(Xtrans, np.ones(X.shape) * constant)
assert np.array_equal(ytrans, np.ones(y.shape) * constant)

# ts with univariate contextual data
Xt = np.random.rand(100, 10, 4)
Xc = np.random.rand(100)
X = TS_Data(Xt, Xc)
y = np.ones(100)

identity.fit(X, y)
Xtrans, ytrans, _ = identity.transform(X, y)
assert Xtrans == X
assert ytrans is y

custom.fit(X, y)
Xtrans, ytrans, _ = custom.transform(X, y)
Xtt, Xtc = get_ts_data_parts(Xtrans)
assert np.array_equal(Xtt, np.ones(Xt.shape) * constant)
assert Xtc is Xc
assert np.array_equal(ytrans, np.ones(y.shape) * constant)

# ts with multivariate contextual data
Xt = np.random.rand(100, 10, 4)
Xc = np.random.rand(100, 3)
X = TS_Data(Xt, Xc)
y = np.ones(100)

identity.fit(X, y)
Xtrans, ytrans, _ = identity.transform(X, y)
assert Xtrans == X
assert ytrans is y

custom.fit(X, y)
Xtrans, ytrans, _ = custom.transform(X, y)
Xtt, Xtc = get_ts_data_parts(Xtrans)
assert np.array_equal(Xtt, np.ones(Xt.shape) * constant)
assert Xtc is Xc
assert np.array_equal(ytrans, np.ones(y.shape) * constant)

# test resampling
def resample(Xt, yt):
return Xt.reshape(1, -1), yt.reshape(1, -1)

illegal_resampler = transform.FunctionXYTransformer(resample)
permitted_resampler = transform.FunctionXYTransformer(resample, disable_resample=False)

X = np.random.rand(100, 10)
y = np.ones(100)

illegal_resampler.fit(X, y)
with pytest.raises(ValueError):
Xtrans, ytrans, _ = illegal_resampler.transform(X, y)

permitted_resampler.fit(X, y)
Xtrans, ytrans, _ = permitted_resampler.transform(X, y)
assert len(Xtrans) == 1
assert len(ytrans) == 1
88 changes: 87 additions & 1 deletion seglearn/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .util import get_ts_data_parts, check_ts_data

__all__ = ['SegmentX', 'SegmentXY', 'SegmentXYForecast', 'PadTrunc', 'Interp', 'FeatureRep',
'FeatureRepMix']
'FeatureRepMix', 'FunctionXYTransformer']


class XyTransformerMixin(object):
Expand Down Expand Up @@ -1141,3 +1141,89 @@ def transform(self, X):
fts = np.column_stack([fts, Xc])

return fts

class FunctionXYTransformer(BaseEstimator, XyTransformerMixin):
'''
Transformer for applying a custom function on datasets where X is time series data, optionally
with contextual variables and y is also time series data with the same sampling interval as X.
Parameters
----------
func : function, optional (default=None)
the function to be applied on Xt and yt (Xt and yt are unaltered if no function is provided)
func_kwargs : dictionary, optional (default={})
keyword arguments to be passed to the function call
disable_resample : bool, optional (default=True)
whether or not to allow resampling operations (i.e. functions that change the number of
samples) - WARNING: enabling this might be dangerous as this transform will be applied on
training and test data
Returns
-------
self : object
Returns self.
'''

def __init__(self, func=None, func_kwargs={}, disable_resample=True):
self.func = func
self.func_kwargs = func_kwargs
self.disable_resample = disable_resample

def fit(self, X, y=None):
'''
Fit the transform
Parameters
----------
X : array-like, shape [n_series, ...]
Time series data and (optionally) contextual data
y : None
There is no need of a target in a transformer, yet the pipeline API requires this
parameter.
Returns
-------
self : object
Returns self.
'''
check_ts_data(X, y)
return self

def transform(self, X, y, sample_weight=None):
'''
Transforms the time series data and the target vector based on the provided function.
Note this transformation may change the number of samples in the data.
Currently sample weights always returned as None.
Parameters
----------
X : array-like, shape [n_series, ...]
Time series data and (optionally) contextual data
y : array-like shape [n_series], default = None
target vector
sample_weight : array-like shape [n_series], default = None
sample weights
Returns
-------
Xt : array-like, shape [n_segments, ]
transformed time series data
yt : array-like, shape [n_segments]
expanded target vector
sample_weight_new : None
'''
check_ts_data(X, y)

if self.func is None:
return X, y, None
else:
Xt, Xc = get_ts_data_parts(X)
n_samples = len(Xt)
Xt, yt = self.func(Xt, y, **self.func_kwargs)
if self.disable_resample and (len(Xt) != n_samples or len(yt) != n_samples):
raise ValueError("Changing the number of samples inside a FunctionXYTransformer is"
"disabled.")
if Xc is not None:
Xt = TS_Data(Xt, Xc)
return Xt, yt, None

0 comments on commit d0bd406

Please sign in to comment.