Skip to content

Commit

Permalink
abandoning approach
Browse files Browse the repository at this point in the history
  • Loading branch information
dmbee committed May 29, 2019
1 parent c23914f commit 0a421fc
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 59 deletions.
2 changes: 1 addition & 1 deletion seglearn/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.8"
__version__ = "2.0.0"
3 changes: 2 additions & 1 deletion seglearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def __getitem__(self, indices):
timestamps = self.timestamps[indices]
sernum = self.sernum[indices]

return TS_Data(ts_data, context_data, timestamps, sernum)
# return TS_Data(ts_data, context_data, timestamps, sernum)
return ts_data

def __next__(self):
if self.index == self.N:
Expand Down
60 changes: 30 additions & 30 deletions seglearn/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,36 @@
from seglearn.base import TS_Data


def test_ts_data():
# time series data
ts = np.array([np.random.rand(100, 10), np.random.rand(200, 10), np.random.rand(20, 10)])
c = np.random.rand(3, 10)
data = TS_Data(ts, c)

assert np.array_equal(data.context_data, c)
assert np.array_equal(data.ts_data, ts)

assert isinstance(data[1], TS_Data)
assert np.array_equal(data[1].ts_data, ts[1])
assert np.array_equal(data[1].context_data, c[1])

# segmented time series data

sts = np.random.rand(100, 10, 6)
c = np.random.rand(100, 6)

data = TS_Data(sts, c)
assert isinstance(data[4:10], TS_Data)
assert np.array_equal(data[4:10].ts_data, sts[4:10])
assert np.array_equal(data[4:10].context_data, c[4:10])

sts = np.random.rand(100, 10)
c = np.random.rand(100)

data = TS_Data(sts, c)
assert isinstance(data[4:10], TS_Data)
assert np.array_equal(data[4:10].ts_data, sts[4:10])
assert np.array_equal(data[4:10].context_data, c[4:10])
# def test_ts_data():
# # time series data
# ts = np.array([np.random.rand(100, 10), np.random.rand(200, 10), np.random.rand(20, 10)])
# c = np.random.rand(3, 10)
# data = TS_Data(ts, c)
#
# assert np.array_equal(data.context_data, c)
# assert np.array_equal(data.ts_data, ts)
#
# assert isinstance(data[1], TS_Data)
# assert np.array_equal(data[1].ts_data, ts[1])
# assert np.array_equal(data[1].context_data, c[1])
#
# # segmented time series data
#
# sts = np.random.rand(100, 10, 6)
# c = np.random.rand(100, 6)
#
# data = TS_Data(sts, c)
# assert isinstance(data[4:10], TS_Data)
# assert np.array_equal(data[4:10].ts_data, sts[4:10])
# assert np.array_equal(data[4:10].context_data, c[4:10])
#
# sts = np.random.rand(100, 10)
# c = np.random.rand(100)
#
# data = TS_Data(sts, c)
# assert isinstance(data[4:10], TS_Data)
# assert np.array_equal(data[4:10].ts_data, sts[4:10])
# assert np.array_equal(data[4:10].context_data, c[4:10])


def test_watch():
Expand Down
93 changes: 67 additions & 26 deletions seglearn/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from .base import TS_Data
from .feature_functions import base_features
from .util import get_ts_data_parts, check_ts_data
from .util import get_ts_data_parts, check_ts_data, get_ts_parts

__all__ = ['SegmentX', 'SegmentXY', 'SegmentXYForecast', 'PadTrunc', 'InterpLongToWide', 'Interp',
'FeatureRep', 'FeatureRepMix', 'FunctionTransformer']
Expand Down Expand Up @@ -199,7 +199,8 @@ def transform(self, X, y=None, sample_weight=None):
expanded sample weights
'''
check_ts_data(X, y)
Xt, Xc = get_ts_data_parts(X)

Xt, Xc, ts, sn = get_ts_parts(X)
yt = y
swt = sample_weight

Expand All @@ -212,19 +213,31 @@ def transform(self, X, y=None, sample_weight=None):
Xt = np.array([sliding_window(Xt[i], self.width, self._step, self.order)
for i in np.arange(N)])

Nt = [len(Xt[i]) for i in np.arange(len(Xt))]
Nt = [len(Xt[i]) for i in np.arange(N)] # how many segments in each series

if sn is not None:
sn = [np.full(Nt[i], sn[i]) for i in np.arange(N)]

if Xc is not None:
Xc = expand_variables_to_segments(Xc, Nt)

if ts is not None:
ts = np.array([sliding_window(ts[i], self.width, self._step, self.order)
for i in np.arange(N)])
ts = np.concatenate(ts)
ts = middle(ts)

Xt = np.concatenate(Xt)

if isinstance(X, TS_Data):
Xt = TS_Data(Xt, Xc, ts, sn)

if yt is not None:
yt = expand_variables_to_segments(yt, Nt).ravel()

if swt is not None:
swt = expand_variables_to_segments(swt, Nt).ravel()

if Xc is not None:
Xc = expand_variables_to_segments(Xc, Nt)
Xt = TS_Data(Xt, Xc)

if self.shuffle is True:
check_random_state(self.random_state)
return shuffle_data(Xt, yt, swt)
Expand Down Expand Up @@ -353,7 +366,7 @@ def transform(self, X, y=None, sample_weight=None):
'''
check_ts_data(X, y)
Xt, Xc = get_ts_data_parts(X)
Xt, Xc, ts, sn = get_ts_parts(X)
yt = y

N = len(Xt) # number of time series
Expand All @@ -366,11 +379,23 @@ def transform(self, X, y=None, sample_weight=None):
for i in np.arange(N)])

Nt = [len(Xt[i]) for i in np.arange(len(Xt))]
Xt = np.concatenate(Xt)

if sn is not None:
sn = [np.full(Nt[i], sn[i]) for i in np.arange(N)]

if Xc is not None:
Xc = expand_variables_to_segments(Xc, Nt)
Xt = TS_Data(Xt, Xc)

if ts is not None:
ts = np.array([sliding_window(ts[i], self.width, self._step, self.order)
for i in np.arange(N)])
ts = np.concatenate(ts)
ts = self.y_func(ts)

Xt = np.concatenate(Xt)

if isinstance(X, TS_Data):
Xt = TS_Data(Xt, Xc, ts, sn)

if yt is not None:
yt = np.array([sliding_window(yt[i], self.width, self._step, self.order)
Expand Down Expand Up @@ -506,13 +531,9 @@ def transform(self, X, y, sample_weight=None):
'''
check_ts_data(X, y)
Xt, Xc = get_ts_data_parts(X)
Xt, Xc, ts, sn = get_ts_parts(X)
yt = y

# if only one time series is learned
if len(Xt[0]) == 1:
Xt = [Xt]

N = len(Xt) # number of time series

if Xt[0].ndim > 1:
Expand All @@ -523,14 +544,26 @@ def transform(self, X, y, sample_weight=None):
for i in np.arange(N)])

Nt = [len(Xt[i]) for i in np.arange(len(Xt))]
Xt = np.concatenate(Xt)

# todo: implement advance X
Xt = Xt[:, 0:self.width]
if sn is not None:
sn = [np.full(Nt[i], sn[i]) for i in np.arange(N)]

if Xc is not None:
Xc = expand_variables_to_segments(Xc, Nt)
Xt = TS_Data(Xt, Xc)

if ts is not None:
ts = np.array([sliding_window(ts[i], self.width + self.forecast, self._step, self.order)
for i in np.arange(N)])
ts = np.concatenate(ts)
ts = ts[:, self.width:(self.width + self.forecast)] # target y
ts = self.y_func(ts)

Xt = np.concatenate(Xt)
# todo: implement advance X
Xt = Xt[:, 0:self.width]

if isinstance(X, TS_Data):
Xt = TS_Data(Xt, Xc, ts, sn)

if yt is not None:
yt = np.array([sliding_window(yt[i], self.width + self.forecast, self._step, self.order)
Expand Down Expand Up @@ -676,23 +709,26 @@ def transform(self, X, y=None, sample_weight=None):
'''
check_ts_data(X, y)
Xt, Xc = get_ts_data_parts(X)
Xt, Xc, ts, sn = get_ts_parts(X)
yt = y
swt = sample_weight

Xt = self._mv_resize(Xt)

if Xc is not None:
Xt = TS_Data(Xt, Xc)
if ts is not None:
ts = self._mv_resize(ts)

if yt is not None and len(np.atleast_1d(yt[0])) > 1:
if yt is not None and len(np.atleast_1d(yt[0])) > 1: # todo: this will fail for one-hot y pr y strings
# y is a time series
yt = self._mv_resize(yt)
swt = None
elif yt is not None:
# todo: is this needed?
yt = np.array(yt)

if isinstance(X, TS_Data):
Xt = TS_Data(Xt, Xc, ts, sn)

return Xt, yt, swt


Expand Down Expand Up @@ -791,7 +827,7 @@ def transform(self, X, y=None, sample_weight=None):
D = Xt[0].shape[1] - 1 # number of data channels

# 1st channel is time
t = [Xt[i][:, 0] for i in np.arange(N)]
t = [Xt[i][:, 0] for i in np.arange(N)] # todo: redo this
t_lin = [np.arange(Xt[i][0, 0], Xt[i][-1, 0], self.sample_period) for i in np.arange(N)]

if D == 1:
Expand Down Expand Up @@ -1117,12 +1153,17 @@ def transform(self, X):
'''
self._check_if_fitted()
Xt, Xc = get_ts_data_parts(X)
Xt, Xc, ts, sn = get_ts_parts(X)
check_array(Xt, dtype='numeric', ensure_2d=False, allow_nd=True)

fts = np.column_stack([self.features[f](Xt) for f in self.features])
if Xc is not None:
fts = np.column_stack([fts, Xc])

# if isinstance(X, TS_Data):
# return TS_Data(fts, None, ts, sn)
# else:
# return fts
return fts

def _reset(self):
Expand Down Expand Up @@ -1168,7 +1209,7 @@ def _generate_feature_labels(self, X):
'''
Generates string feature labels
'''
Xt, Xc = get_ts_data_parts(X)
Xt, Xc, ts, sn = get_ts_parts(X)

ftr_sizes = self._check_features(self.features, Xt[0:3])
f_labels = []
Expand Down
8 changes: 7 additions & 1 deletion seglearn/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from seglearn.base import TS_Data

__all__ = ['get_ts_data_parts', 'check_ts_data', 'check_ts_data_with_ts_target', 'ts_stats']
__all__ = ['get_ts_data_parts', 'get_ts_parts', 'check_ts_data', 'check_ts_data_with_ts_target', 'ts_stats']

def get_ts_data_parts(X):
'''
Expand All @@ -32,6 +32,12 @@ def get_ts_data_parts(X):
return X, None
return X.ts_data, X.context_data

def get_ts_parts(X):
if isinstance(X, TS_Data):
return X.ts_data, X.context_data, X.timestamps, X.sernum
else:
return X, None, None, None


def check_ts_data(X, y=None):
'''
Expand Down

0 comments on commit 0a421fc

Please sign in to comment.