# Try Cesium

* Feature list: http://cesium-ml.org/docs/feature_table.html
* Example: https://github.com/cesium-ml/cesium/blob/master/examples/plot_EEG_Example.py
  * It has built-in model prediction for its special input data structure
  
### Summary
* It's easy to add customized ts features with Cesium
* To use expanding or rolling window, its input has a special data structure, which puts a window of data in a list at each row
* Comparing with tsfresh, it's doesn't have much flexibility to modify feature extraction settings or params

In [2]:
from cesium import featurize
import pandas as pd

In [3]:
# mock up ts data

df = pd.DataFrame({
   "group": ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
   "time": [1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5],
   "x": [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23],
   "y": [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24],
})

df

Unnamed: 0,group,time,x,y
0,a,1,1,2
1,a,2,3,4
2,a,3,5,6
3,a,4,7,8
4,a,5,9,10
5,a,6,11,12
6,a,7,13,14
7,b,1,15,16
8,b,2,17,18
9,b,3,19,20


In [10]:
from cesium import datasets
import numpy as np

eeg = datasets.fetch_andrzejak()

# Group together classes (Z, O), (N, F), (S) as normal, interictal, ictal
eeg["classes"] = eeg["classes"].astype("U16") #  allocate memory for longer class names
eeg["classes"][np.logical_or(eeg["classes"]=="Z", eeg["classes"]=="O")] = "Normal"
eeg["classes"][np.logical_or(eeg["classes"]=="N", eeg["classes"]=="F")] = "Interictal"
eeg["classes"][eeg["classes"]=="S"] = "Ictal"

Loaded data from cached archive.


In [12]:
pd.DataFrame(eeg).head()

Unnamed: 0,times,measurements,classes,archive,header
0,"[0.0, 0.00576171875, 0.0115234375, 0.017285156...","[40.0, 48.0, 35.0, 5.0, -40.0, -54.0, -32.0, 6...",Normal,C:\Users\wuhan/.local/datasets/andrzejak\andrz...,C:\Users\wuhan/.local/datasets/andrzejak\andrz...
1,"[0.0, 0.00576171875, 0.0115234375, 0.017285156...","[-56.0, -50.0, -64.0, -91.0, -135.0, -140.0, -...",Normal,C:\Users\wuhan/.local/datasets/andrzejak\andrz...,C:\Users\wuhan/.local/datasets/andrzejak\andrz...
2,"[0.0, 0.00576171875, 0.0115234375, 0.017285156...","[-37.0, -22.0, -17.0, -24.0, -31.0, -20.0, -5....",Normal,C:\Users\wuhan/.local/datasets/andrzejak\andrz...,C:\Users\wuhan/.local/datasets/andrzejak\andrz...
3,"[0.0, 0.00576171875, 0.0115234375, 0.017285156...","[-31.0, -43.0, -39.0, -39.0, -9.0, -5.0, 18.0,...",Normal,C:\Users\wuhan/.local/datasets/andrzejak\andrz...,C:\Users\wuhan/.local/datasets/andrzejak\andrz...
4,"[0.0, 0.00576171875, 0.0115234375, 0.017285156...","[14.0, 26.0, 32.0, 25.0, 16.0, 8.0, 8.0, 12.0,...",Normal,C:\Users\wuhan/.local/datasets/andrzejak\andrz...,C:\Users\wuhan/.local/datasets/andrzejak\andrz...


### Generate selected features

In [15]:
features_to_use = ["amplitude",
                   "percent_beyond_1_std",
                   "flux_percentile_ratio_mid20",
                   "maximum",
                   "max_slope",
                   "median",
                   "median_absolute_deviation",
                   "percent_close_to_median",
                   "minimum",
                   "skew",
                   "std",
                   "weighted_average"]

fset_cesium = featurize.featurize_time_series(times=eeg["times"],
                                              values=eeg["measurements"],
                                              errors=None,
                                              features_to_use=features_to_use)

fset_cesium

  linear_scale_data = base ** (exponent * x)


feature,amplitude,percent_beyond_1_std,flux_percentile_ratio_mid20,maximum,max_slope,median,median_absolute_deviation,percent_close_to_median,minimum,skew,std,weighted_average
channel,0,0,0,0,0,0,0,0,0,0,0,0
0,143.5,0.327313,6.309573e-22,141.0,11107.796610,-4.0,28.0,0.505004,-146.0,0.032805,40.411000,-4.132048
1,211.5,0.290212,3.981072e-29,169.0,20653.559322,-51.0,32.0,0.640469,-254.0,-0.092715,48.812668,-52.444716
2,165.0,0.302660,1.000000e-26,184.0,13537.627119,13.0,31.0,0.515987,-146.0,-0.004100,47.144789,12.705150
3,171.5,0.300952,1.928693e-26,162.0,17008.813559,-4.0,31.0,0.541128,-181.0,0.063678,47.072316,-3.992433
4,170.0,0.305101,3.981072e-25,152.0,13016.949153,-18.0,29.0,0.566268,-188.0,0.142753,44.910958,-17.999268
...,...,...,...,...,...,...,...,...,...,...,...,...
495,876.5,0.368318,6.309573e-216,727.0,94242.711864,83.0,246.0,0.364413,-1026.0,-0.472757,332.455418,12.870393
496,433.0,0.361728,1.000000e-88,467.0,27595.932203,10.0,113.0,0.394923,-399.0,0.046880,159.833074,7.087137
497,1359.0,0.270442,1.203319e-287,1435.0,243156.610169,83.0,196.0,0.628265,-1283.0,-0.468130,374.077172,30.493532
498,1590.0,0.305589,0.000000e+00,1364.0,176856.949153,116.0,324.0,0.489138,-1816.0,-0.674034,505.060930,37.571882


In [23]:
print(np.median(eeg['measurements'][0]), np.median(eeg['measurements'][1]), np.median(eeg['measurements'][2]))

-4.0 -51.0 13.0


### Generate customized features

In [24]:
import scipy.stats

def hanhan_mean_signal2(t, m, e):
    return np.mean(m)*2

def hanhan_std_signal2(t, m, e):
    return np.std(m)*2

In [27]:
guo_features = {
    "mean2": hanhan_mean_signal2,
    "std2": hanhan_std_signal2
}

fset_guo = featurize.featurize_time_series(times=eeg["times"], values=eeg["measurements"],
                                           errors=None,
                                           features_to_use=list(guo_features.keys()),
                                           custom_functions=guo_features)
fset_guo.head()

feature,mean2,std2
channel,0,0
0,-8.264096,80.822001
1,-104.889431,97.625335
2,25.4103,94.289578
3,-7.984867,94.144633
4,-35.998536,89.821916
