In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import warnings
warnings.filterwarnings('ignore') 

from tensorflow import keras
from sklearn.preprocessing import RobustScaler, Normalizer, StandardScaler
from sklearn.model_selection import train_test_split
from datasets import load_data, random_benchmark, list_datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, accuracy_score
from Imputation import remove_and_impute
from Models import SAE, CNN_AE, LSTM_AE, GRU_AE, Bi_LSTM_AE, CNN_Bi_LSTM_AE, Causal_CNN_AE, Wavenet

np.random.seed(7)
tf.random.set_seed(7)

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
rf_clf = RandomForestClassifier(n_jobs=-1, n_estimators=100, random_state=7)
svm_clf = SVC(random_state=7, gamma='scale')
knn_clf = KNeighborsClassifier(n_neighbors=1, weights='distance', n_jobs=-1)
mlp_clf = MLPClassifier(random_state=7)

In [4]:
from TRepNet import TRepNet

In [5]:
es = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
# mc = keras.callbacks.ModelCheckpoint('model.h5', save_best_only=True)

In [6]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def flatten_ts(train, test):
    new_train, new_test = [], []
    train_lens = []
    
    for _, row in train.iterrows():
        for i in row.index:
            train_lens.append(len(row[i]))

    maxlen = np.ceil(np.average(train_lens)).astype(int)
    
    for _, row in train.iterrows():
        new_list = []
        for i in row.index:
            ts = []
            for j in range(len(row[i])):
                ts.append(row[i][j])
            new_list.append(ts)
        new_train.append(pad_sequences(new_list, maxlen=maxlen, dtype='float32'))
        
    for _, row in test.iterrows():
        new_list = []
        for i in row.index:
            ts = []
            for j in range(len(row[i])):
                ts.append(row[i][j])
            new_list.append(ts)
        new_test.append(pad_sequences(new_list, maxlen=maxlen, dtype='float32'))
            
    train_df = pd.DataFrame(np.array(new_train).reshape(train.shape[0], maxlen * train.columns.shape[0]))
    test_df = pd.DataFrame(np.array(new_test).reshape(test.shape[0], maxlen * train.columns.shape[0]))

    scaler = RobustScaler()
    scaler.fit(train_df)
    return scaler.transform(train_df), scaler.transform(test_df), maxlen * train.columns.shape[0]
#     return np.array(train_df), np.array(test_df), maxlen * train.columns.shape[0]

def rnn_reshape(train, test, n_steps, n_features):
#     train, test = flatten_ts(train, test)
    return train.reshape(train.shape[0], n_steps, n_features), test.reshape(test.shape[0], n_steps, n_features)

In [7]:
# when tuning start with learning rate->mini_batch_size -> 
# momentum-> #hidden_units -> # learning_rate_decay -> #layers 

from tensorflow.keras.utils import plot_model
from sklearn.model_selection import GridSearchCV

def evaluate(fn, data_name, univariate):
    print('Data: ', data_name)
    train_x, train_y, test_x, test_y = load_data(data_name, univariate=univariate)    
#     n_steps = train_x.iloc[0][0].shape[0]
    n_features = train_x.columns.shape[0]
        
    X_train, X_test, n_steps = flatten_ts(train_x, test_x)
    X_train, X_test = rnn_reshape(X_train, X_test, n_steps // n_features, n_features)
            
    encoder, decoder = fn(n_steps // n_features, n_features, activation='elu')
    model = keras.models.Sequential([encoder, decoder])

    model.compile(loss="mae", optimizer=keras.optimizers.Nadam(lr=0.001, clipnorm=1.), metrics=['mae'])
    history = model.fit(X_train, X_train, epochs=500, batch_size=16, validation_data=[X_test, X_test], callbacks=[es], verbose=0, shuffle=False)
    
    # Codings
    codings_train = encoder.predict(X_train)
    codings_test = encoder.predict(X_test)
    
    # RF
    rf_clf.fit(codings_train, train_y)
    pred = rf_clf.predict(codings_test)
    rf_scores = {'accuracy': accuracy_score(test_y, pred), 'f1': f1_score(test_y, pred, average='weighted')}
    print('RF >>', rf_scores)

    # SVM
    svm_clf.fit(codings_train, train_y)
    pred = svm_clf.predict(codings_test)
    svm_scores = {'accuracy': accuracy_score(test_y, pred), 'f1': f1_score(test_y, pred, average='weighted')}
    print('SVM >>', svm_scores)

    # 1-NN
    knn_clf.fit(codings_train, train_y)
    pred = knn_clf.predict(codings_test)
    knn_scores = {'accuracy': accuracy_score(test_y, pred), 'f1': f1_score(test_y, pred, average='weighted')}
    print('1-NN >>', knn_scores)

    # MLP
    mlp_clf.fit(codings_train, train_y)
    pred = mlp_clf.predict(codings_test)
    mlp_scores = {'accuracy': accuracy_score(test_y, pred), 'f1': f1_score(test_y, pred, average='weighted')}
    print('MLP >>', mlp_scores)
    
    results.append({'dataset': data_name, 'dim': codings_train.shape[1], 
                    'RF-ACC': rf_scores['accuracy'], 
                    'SVM-ACC': svm_scores['accuracy'],
                    '1NN-ACC': knn_scores['accuracy'], 
                    'MLP-ACC': mlp_scores['accuracy'], 
                    'RF-F1': rf_scores['f1'], 
                    'SVM-F1': svm_scores['f1'],
                    '1NN-F1': knn_scores['f1'], 
                    'MLP-F1': mlp_scores['f1']
                    })

In [8]:
selected_mul_datasets = ['ArticularyWordRecognition', 'AtrialFibrillation', 'BasicMotions', 'Cricket',
                         'ERing', 'HandMovementDirection', 'Handwriting', 'JapaneseVowels', 'PenDigits', 'RacketSports', 'SelfRegulationSCP1',
                         'SelfRegulationSCP2', 'SpokenArabicDigits', 'StandWalkJump', 'EthanolConcentration']

for fn in [TRepNet]:
    results = []
    for dataset in ['ArticularyWordRecognition', 'AtrialFibrillation', 'BasicMotions', 'ERing', 'Handwriting']:
        evaluate(fn, dataset, univariate=False)
    pd.DataFrame(results).to_csv('./results/mul-'+ fn.__name__ +'-all-results.csv', index=False)
print('END')

Data:  ArticularyWordRecognition
RF >> {'accuracy': 0.9666666666666667, 'f1': 0.9661706235793192}
SVM >> {'accuracy': 0.98, 'f1': 0.9798129117259553}
1-NN >> {'accuracy': 0.9733333333333334, 'f1': 0.9735091787439615}
MLP >> {'accuracy': 0.9733333333333334, 'f1': 0.9731185973446842}
Data:  AtrialFibrillation
RF >> {'accuracy': 0.26666666666666666, 'f1': 0.24444444444444446}
SVM >> {'accuracy': 0.26666666666666666, 'f1': 0.23589743589743595}
1-NN >> {'accuracy': 0.4666666666666667, 'f1': 0.4594017094017094}
MLP >> {'accuracy': 0.3333333333333333, 'f1': 0.3277777777777778}
Data:  BasicMotions
RF >> {'accuracy': 0.95, 'f1': 0.949937343358396}
SVM >> {'accuracy': 1.0, 'f1': 1.0}
1-NN >> {'accuracy': 0.9, 'f1': 0.8958333333333333}
MLP >> {'accuracy': 1.0, 'f1': 1.0}
Data:  ERing
RF >> {'accuracy': 0.9148148148148149, 'f1': 0.9143588428729911}
SVM >> {'accuracy': 0.9148148148148149, 'f1': 0.9135668954392593}
1-NN >> {'accuracy': 0.8962962962962963, 'f1': 0.894732835677418}
MLP >> {'accuracy':