In [2]:
import numpy as np
import glob

!mkdir -p andrzejak
!wget -nc -P andrzejak http://epileptologie-bonn.de/cms/upload/workgroup/lehnertz/{Z,O,N,F,S}.zip
!unzip -uL 'andrzejak/*.zip' -d andrzejak/ # convert to lowercase; N.zip files are .TXT
filenames = glob.glob('andrzejak/[zfson]*.txt')
#values_list = [np.loadtxt(f) for f in filenames]
#T_MAX = 23.6
#for f in filenames:
#    m = np.loadtxt(f)
#    t = np.linspace(start=0, stop=T_MAX, num=len(m))
#    e = np.repeat(1e-6, len(t))
#    np.savetxt(f[:-3] + 'dat', np.vstack((t,m,e)).T, delimiter=',')
#with open('andrzejak/test_header.dat', 'w') as header:
#    header.write('filename,class\n')
#    for f in filenames:
#        short_fname = f.split('/')[1]
#        header.write("{},{}\n".format(short_fname[:-4], short_fname[0]))
#!COPYFILE_DISABLE=1 tar czf andrzejak/test_data.tar.gz andrzejak/[zs]*.dat

File 'andrzejak/Z.zip' already there; not retrieving.

File 'andrzejak/O.zip' already there; not retrieving.

File 'andrzejak/N.zip' already there; not retrieving.

File 'andrzejak/F.zip' already there; not retrieving.

File 'andrzejak/S.zip' already there; not retrieving.

Archive:  andrzejak/F.zip

Archive:  andrzejak/N.zip

Archive:  andrzejak/O.zip

Archive:  andrzejak/S.zip

Archive:  andrzejak/Z.zip

5 archives were successfully processed.


In [42]:
import glob
import numpy as np

T_MAX = 23.6
filenames = glob.glob('andrzejak/[zonsf]*.txt')
m_list = [np.loadtxt(f) for f in filenames]
#t_list = [np.linspace(start=0, stop=T_MAX, num=len(m)) for m in m_list]
#e_list = [np.repeat(1e-5, len(m)) for m in m_list]
classes = np.array([f.split('/')[1][0] for f in filenames]) # class == file prefix
classes = classes.astype('S16') # space for >1 character class names
classes[classes != 'z'] = 'onsf'

In [43]:
from mltsp import cfg
from mltsp import featurize

cfg.DEFAULT_MAX_TIME = 23.6
fset_mltsp = featurize.featurize_time_series(None, m_list, None, 
                                             cfg.features_list_science,
                                             classes)

is_invalid = lambda x: np.any(np.isnan(x)) or np.any(np.abs(x) > 1e32)
for feature in fset_mltsp.data_vars:
    if is_invalid(fset_mltsp[feature].values):
        fset_mltsp = fset_mltsp.drop(feature)

In [46]:
import pywt
import scipy.stats
from mltsp import featurize

# Perform DWT and compute 5 standard features listed by Guo et al. (2012)
# for each of 5 frequency bands
n_channels = 5
dwt_list = [pywt.wavedec(m, pywt.Wavelet('db1'), level=n_channels-1) for m in m_list]
guo_dask = {
    'mean': (np.mean, 'm'),
    'std': (np.std, 'm'),
    'mean2': (lambda x: np.mean(x ** 2), 'm'),
    'abs_diffs': (lambda x: np.sum(np.abs(np.diff(x))), 'm'),
    'skew': (scipy.stats.skew, 'm')
}
fset_guo = featurize.featurize_time_series(None, dwt_list, None, guo_dask.keys(),
                                           classes, custom_functions=guo_dask)

In [47]:
import pywt
import pyeeg
from mltsp import featurize

n_channels = 5
dwt_list = [pywt.wavedec(m, pywt.Wavelet('db6'), level=n_channels-1) for m in m_list]
pyeeg_dask = {
    'ap_entropy': (lambda x: pyeeg.ap_entropy(x, M=2, R=0.15*np.std(x)), 'm'),
#    'lyapunov': (lambda x: pyeeg.LLE(x, 2, 4, 1, 1), 'm')
}
fset_ocak = featurize.featurize_time_series(None, dwt_list, None, pyeeg_dask.keys(),
                                            classes, custom_functions=pyeeg_dask)

In [48]:
from mltsp.build_model import build_model_from_featureset
from mltsp.predict import model_predictions
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.cross_validation import StratifiedKFold


def cv_train_test_errors(model, fset, classes, cv_inds):
    train_errors = np.zeros(len(cv))
    test_errors = np.zeros(len(cv))
    for i, (train, test) in enumerate(cv):
        build_model_from_featureset(fset.isel(name=train), model)
        train_errors[i] = np.mean(model_predictions(fset.isel(name=train), model, 
                                                    return_probs=False) != classes[train])
        test_errors[i] = np.mean(model_predictions(fset.isel(name=test), model,
                                                   return_probs=False) != classes[test])
    return train_errors, test_errors


model_mltsp = RandomForestClassifier(n_estimators=1000, max_features='auto', random_state=0)
model_guo = KNeighborsClassifier(n_neighbors=3)
model_ocak = MLPClassifier(hidden_layer_sizes=(5,), max_iter=int(1e4), activation='tanh', alpha=1e-5)
    
cv = StratifiedKFold(classes, n_folds=2, random_state=0)
mltsp_train_errors, mltsp_test_errors = cv_train_test_errors(model_mltsp, fset_mltsp, classes, cv)
guo_train_errors, guo_test_errors = cv_train_test_errors(model_guo, fset_guo, classes, cv)
ocak_train_errors, ocak_test_errors = cv_train_test_errors(model_ocak, fset_ocak, classes, cv)
print("Built-in MLTSP features: average training error={:.2%}, average test error={:.2%}".format(np.mean(mltsp_train_errors), np.mean(mltsp_test_errors)))
print("Guo et al. features: average training error={:.2%}, average test error={:.2%}".format(np.mean(guo_train_errors), np.mean(guo_test_errors)))
print("App. entropy features: average training error={:.2%}, average test error={:.2%}".format(np.mean(ocak_train_errors), np.mean(ocak_test_errors)))

TypeError: model_predictions() got an unexpected keyword argument 'return_probs'