In [1]:
# import library 
import sktime 
import numpy as np
from sktime.utils.load_data import load_from_arff_to_dataframe

In [2]:
#retrieve X and ytrain from the .arff files 
#this assumes that you unzipped the file in the same folder as this .ipynb file 
#in the case below this .ipynb and the AtrialFibrillation folder are stored in the the folder "examples"
#this load function returns two things 
    #1) Pandas Dataframe storing data 
    #2) Numpy array of corresponding labels (these could be categories that you would like to classify this into)
    
Xtrain, ytrain = load_from_arff_to_dataframe("../examples/AtrialFibrillation/AtrialFibrillation_TRAIN.arff")

In [3]:
# view the first five data points in the dataframe
Xtrain.head()

Unnamed: 0,dim_0,dim_1
0,0 -0.34086 1 -0.38038 2 -0.34580 3...,0 0.14820 1 0.13338 2 0.10868 3...
1,0 -0.11362 1 -0.07410 2 -0.05928 3...,0 -0.00988 1 -0.02470 2 -0.00494 3...
2,0 -0.2079 1 -0.1683 2 -0.1980 3 ...,0 -0.02632 1 -0.04606 2 -0.08554 3...
3,0 -0.11805 1 -0.08657 2 -0.09444 3...,0 0.03510 1 0.04680 2 0.06435 3...
4,0 -0.11362 1 -0.06422 2 -0.05928 3...,0 -0.04940 1 0.01482 2 0.03952 3...


In [4]:
# get a brief description of of the data 
Xtrain.describe()

Unnamed: 0,dim_0,dim_1
count,15,15
unique,15,15
top,0 0.07686 1 0.06588 2 0.02196 3...,0 0.13090 1 0.13090 2 0.09520 3...
freq,1,1


In [5]:
# this function will allow you to see the distinct classes within the np array 
np.unique(ytrain)

array(['n', 's', 't'], dtype='<U1')

In [6]:
# you can also view the shape of this array by doing the following 
# note that the count of labels corresponds to the rows in the dataframe 
ytrain.shape

(15,)

In [7]:
Xtest,ytest = load_from_arff_to_dataframe("../examples/AtrialFibrillation/AtrialFibrillation_TEST.arff")

In [8]:
# the first method we can use is concatenation method
# this concatenates the the multivariate series into a univariate series 
# then you specify a univariarte classifier  

from sktime.transformers.compose import ColumnConcatenator
from sktime.classifiers.compose import TimeSeriesForestClassifier
from sktime.pipeline import Pipeline

steps_concat = [
    ('concatenate', ColumnConcatenator()),
    ('classify', TimeSeriesForestClassifier(n_estimators=100))]
clf_concat = Pipeline(steps_concat)
clf_concat.fit(Xtrain, ytrain)
clf_concat.score(Xtest, ytest)

0.4

In [9]:
from sktime.classifiers.distance_based import KNeighborsTimeSeriesClassifier
steps_concat = [
    ('concatenate', ColumnConcatenator()),
    ('classify', KNeighborsTimeSeriesClassifier(metric='dtw'))]
clf_concat = Pipeline(steps_concat)
clf_concat.fit(Xtrain, ytrain)
clf_concat.score(Xtest, ytest)


0.3333333333333333

In [47]:
from sktime.classifiers.shapelet_based import ShapeletTransformClassifier

clf_shapelet = ShapeletTransformClassifier(time_contract_in_mins=0.7)
clf_shapelet.fit(Xtrain, ytrain)
clf_shapelet.score(Xtest, ytest)

0.2

In [41]:
Xtrain.head()

Unnamed: 0,dim_0,dim_1
0,0 -0.34086 1 -0.38038 2 -0.34580 3...,0 0.14820 1 0.13338 2 0.10868 3...
1,0 -0.11362 1 -0.07410 2 -0.05928 3...,0 -0.00988 1 -0.02470 2 -0.00494 3...
2,0 -0.2079 1 -0.1683 2 -0.1980 3 ...,0 -0.02632 1 -0.04606 2 -0.08554 3...
3,0 -0.11805 1 -0.08657 2 -0.09444 3...,0 0.03510 1 0.04680 2 0.06435 3...
4,0 -0.11362 1 -0.06422 2 -0.05928 3...,0 -0.04940 1 0.01482 2 0.03952 3...


In [43]:
ytrain

array(['n', 'n', 'n', 'n', 'n', 's', 's', 's', 's', 's', 't', 't', 't',
       't', 't'], dtype='<U1')