In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import statistics
import time
import glob
import os
import feather
from sktime.datasets import load_from_tsfile_to_dataframe
sns.set(style="whitegrid")

In [None]:
from featureExtraction import *

--------------------------------------

### LOAD DATA

In [None]:
datasets = ['Adiac', 'Fish', 'OliveOil', 'Phoneme', 'ShapesAll', 'SwedishLeaf', 'WordSynonyms']

Compute for CATCH22:

In [None]:
df = pd.DataFrame()

for dataset in datasets:
    print(dataset)
    
    xTrain, yTrain = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')
    xTest, yTest   = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    
    frameTrain = Catch22.transform(xTrain)
    l = len(frameTrain)
    frameTrain['data'] = l * [dataset]
    frameTrain['set'] = l * ['train'] 
    frameTrain['label'] = [int(i) for i in yTrain]
    
    frameTest = Catch22.transform(xTest)
    l = len(frameTest)
    frameTest['data'] = l * [dataset]
    frameTest['set'] = len(frameTest) * ['test'] 
    frameTest['label'] = [int(i) for i in yTest]
    
    df = pd.concat([df, frameTrain, frameTest], ignore_index=True)

# remove features with more than 10% NaN
output_catch22 = df.drop(df.columns[df.apply(lambda col: (col.isnull().sum() / len(df)) > 0.1)], axis=1)

# drop time series with nan values
output_catch22 = output_catch22.dropna(axis=0)
output_catch22.to_feather('featureFrames/output_catch22.feather')

Compute for KATS:

In [None]:
df = pd.DataFrame()

for dataset in datasets:
    print(dataset)
    
    xTrain, yTrain = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')
    xTest, yTest   = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    
    frameTrain = Kats.transform(xTrain)
    l = len(frameTrain)
    frameTrain['data'] = l * [dataset]
    frameTrain['set'] = l * ['train'] 
    frameTrain['label'] = [int(i) for i in yTrain]
    
    frameTest = Kats.transform(xTest)
    l = len(frameTest)
    frameTest['data'] = l * [dataset]
    frameTest['set'] = len(frameTest) * ['test'] 
    frameTest['label'] = [int(i) for i in yTest]
    
    df = pd.concat([df, frameTrain, frameTest], ignore_index=True)

# remove features with more than 10% NaN
output_kats = df.drop(df.columns[df.apply(lambda col: (col.isnull().sum() / len(df)) > 0.1)], axis=1)

# drop time series with nan values
output_kats = output_kats.dropna(axis=0)
output_kats.to_feather('featureFrames/output_kats.feather')

Compute for TSFEATURES:

In [None]:
df = pd.DataFrame()

for dataset in datasets:
    print(dataset)
    
    xTrain, yTrain = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')
    xTest, yTest   = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    
    frameTrain = TSFeatures.transform(xTrain)
    l = len(frameTrain)
    frameTrain['data'] = l * [dataset]
    frameTrain['set'] = l * ['train'] 
    frameTrain['label'] = [int(i) for i in yTrain]
    
    frameTest = TSFeatures.transform(xTest)
    l = len(frameTest)
    frameTest['data'] = l * [dataset]
    frameTest['set'] = len(frameTest) * ['test'] 
    frameTest['label'] = [int(i) for i in yTest]
    
    df = pd.concat([df, frameTrain, frameTest], ignore_index=True)

# remove features with more than 10% NaN
output_tsfeature = df.drop(df.columns[df.apply(lambda col: (col.isnull().sum() / len(df)) > 0.1)], axis=1)

# drop time series with nan values
output_tsfeature = output_tsfeature.dropna(axis=0)
output_tsfeature = output_tsfeature.reset_index().iloc[:, 1:]
output_tsfeature.to_feather('featureFrames/output_tsfeature.feather')

In [None]:
output_tsfeature = output_tsfeature.reset_index()
output_tsfeature.iloc[:,2:].to_feather('featureFrames/test.feather')

Compute for TSFEL:

In [None]:
df = pd.DataFrame()

for dataset in datasets:
    print(dataset)
    
    xTrain, yTrain = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')
    xTest, yTest   = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    
    frameTrain = TSFel.transform(xTrain)
    l = len(frameTrain)
    frameTrain['data'] = l * [dataset]
    frameTrain['set'] = l * ['train'] 
    frameTrain['label'] = [int(i) for i in yTrain]
    
    frameTest = TSFel.transform(xTest)
    l = len(frameTest)
    frameTest['data'] = l * [dataset]
    frameTest['set'] = len(frameTest) * ['test'] 
    frameTest['label'] = [int(i) for i in yTest]
    
    df = pd.concat([df, frameTrain, frameTest], ignore_index=True)

# remove features with more than 10% NaN
output_tsfel = df.drop(df.columns[df.apply(lambda col: (col.isnull().sum() / len(df)) > 0.1)], axis=1)

# drop time series with nan values
output_tsfel = output_tsfel.dropna(axis=0)
output_tsfel.to_feather('featureFrames/output_tsfel.feather')

Compute for TSFRESH:

In [None]:
df = pd.DataFrame()

for dataset in datasets:
    print(dataset)
    
    xTrain, yTrain = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')
    xTest, yTest   = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    
    frameTrain = TSFresh.transform(xTrain)
    l = len(frameTrain)
    frameTrain['data'] = l * [dataset]
    frameTrain['set'] = l * ['train'] 
    frameTrain['label'] = [int(i) for i in yTrain]
    
    frameTest = TSFresh.transform(xTest)
    l = len(frameTest)
    frameTest['data'] = l * [dataset]
    frameTest['set'] = len(frameTest) * ['test'] 
    frameTest['label'] = [int(i) for i in yTest]
    
    df = pd.concat([df, frameTrain, frameTest], ignore_index=True)

# remove features with more than 10% NaN
output_tsfresh = df.drop(df.columns[df.apply(lambda col: (col.isnull().sum() / len(df)) > 0.1)], axis=1)

# drop time series with nan values
output_tsfresh = output_tsfresh.dropna(axis=0)
output_tsfresh.to_feather('featureFrames/output_tsfresh.feather')

In [None]:
d = output_tsfresh.reset_index()
d.to_feather('featureFrames/output_tsfresh.feather')