In [23]:
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen

import numpy as np
import pandas as pd
import os, sys
import requests

sys.path.insert(0, '..')

from transformers.data_container import *
from transformers.transformers import TimeSeriesTransformer, MeanSeriesTransformer, TimeSeriesWindowTransformer

In [5]:
# Read a TimeSeries from URL
url = urlopen("http://timeseriesclassification.com/Downloads/FordA.zip")
zipfile = ZipFile(BytesIO(url.read()))
lines = zipfile.open('FordA/FordA.csv').readlines()
lines = [l.decode('utf-8') for l in lines]
lines = lines[505:]
# lines now is a list of strings with of timeseries in comma separeted format
# 505 is a offset for the beginning of seriases

lines = [list(map(float, l.split(','))) for l in lines]

In [7]:
lines[0][:10]

[1.1871,
 0.4096,
 -0.43154,
 -1.231,
 -1.9055,
 -2.3824,
 -2.588,
 -2.5018,
 -2.1353,
 -1.574]

In [8]:
# now let's create a list of pd.Series
lines = [pd.Series(l) for l in lines]

In [11]:
lines[0][:10]

0    1.18710
1    0.40960
2   -0.43154
3   -1.23100
4   -1.90550
5   -2.38240
6   -2.58800
7   -2.50180
8   -2.13530
9   -1.57400
dtype: float64

create a MultiSeries series of pd.Series objects

In [12]:
X = MultiSeries(lines)

In [15]:
X.head()

0    0      1.187100
1      0.409600
2     -0.43154...
1    0      0.094261
1      0.310310
2      0.53060...
2    0     -1.157000
1     -1.592600
2     -1.50960...
3    0      0.356960
1      0.300850
2      0.24314...
4    0      0.307980
1      0.370350
2      0.26015...
dtype: object
data_type: <class 'pandas.core.series.Series'>

X now is a Series of pd.Serieses. It means, that every element of this Series is pd.Series.
Let's try some transformers over it.
First transformer is a windows transformer. This transformer calculated rolling mean with a given windows size (or defaul) and return new series.

In [16]:
tr = TimeSeriesWindowTransformer(windows_size=5)
tr.fit()
transformed_series = tr.transform(X)

In [17]:
transformed_series.head()

0    0           NaN
1           NaN
2           Na...
1    0           NaN
1           NaN
2           Na...
2    0           NaN
1           NaN
2           Na...
3    0           NaN
1           NaN
2           Na...
4    0           NaN
1           NaN
2           Na...
dtype: object
data_type: <class 'pandas.core.series.Series'>

Of course, with a widndows_zie = 5 first 4 elements are NaN.

In [18]:
transformed_series[0].head(10)

0         NaN
1         NaN
2         NaN
3         NaN
4   -0.394268
5   -1.108168
6   -1.707688
7   -2.121740
8   -2.302600
9   -2.236300
dtype: float64

Let's try another transformer, that extracts mean and std from series and transform series into DataFrame

In [19]:
tr = TimeSeriesTransformer()
tr.fit()
transformed_series = tr.transform(X)

In [20]:
type(transformed_series)

transformers.data_container.data_container.MultiDataFrame

And last example of transformer — transformer with params

In [21]:
tr = MeanSeriesTransformer()
tr.fit(X)
transformed_series = tr.transform(X)

In [22]:
transformed_series.head(10)

0    0.002049
1    0.002051
2   -0.001942
3   -0.001943
4   -0.001941
5    0.002050
6    0.002051
7   -0.001943
8   -0.001942
9   -0.001943
dtype: float64
data_type: <class 'numpy.float64'>

The most frequeте transformer will be TimeSeriesTransformer. It extracts ~10 most popular quantitative features from TS.

In [24]:
tr = TimeSeriesTransformer()
tr.fit(X)
transformed_series = tr.transform(X)

In [28]:
transformed_series.head()

Unnamed: 0,max,mean,median,min,quantile_25,quantile_75,quantile_90,quantile_95,std
0,2.5263,0.001995,0.011186,-2.7875,-0.73635,0.74192,1.2534,1.5463,0.999998
1,2.6291,0.001997,-0.024726,-2.4357,-0.67411,0.65808,1.3478,1.6595,0.999997
2,2.6072,-0.001996,0.060685,-3.0132,-0.67588,0.70123,1.2591,1.5184,1.0
3,2.6431,-0.001997,-0.022668,-2.7275,-0.66265,0.56858,1.4102,1.8094,0.999997
4,3.2398,-0.001995,-0.048518,-3.0085,-0.70775,0.64898,1.254,1.6699,1.000001
