In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import warnings
warnings.filterwarnings('ignore') 

from tensorflow import keras
from sklearn.preprocessing import RobustScaler, Normalizer, StandardScaler
from sklearn.model_selection import train_test_split
from datasets import load_data, random_benchmark, list_datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn import manifold
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, accuracy_score
from Imputation import remove_and_impute
from Models import SAE, CNN_AE, LSTM_AE, GRU_AE, Bi_LSTM_AE, CNN_Bi_LSTM_AE, Causal_CNN_AE, Wavenet

from matplotlib import pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(11.7,8.27), 'figure.dpi': 600})
sns.set_style("white")

np.random.seed(7)
tf.random.set_seed(7)

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
rf_clf = RandomForestClassifier(n_jobs=-1, n_estimators=100, random_state=7)
svm_clf = SVC(random_state=7, gamma='scale')
knn_clf = KNeighborsClassifier(n_neighbors=1, weights='distance', n_jobs=-1)
mlp_clf = MLPClassifier(random_state=7)

In [4]:
from TRepNet import TRepNet
es = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

In [5]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def flatten_ts(train, test):
    new_train, new_test = [], []
    train_lens = []
    
    for _, row in train.iterrows():
        for i in row.index:
            train_lens.append(len(row[i]))

    maxlen = np.ceil(np.average(train_lens)).astype(int)
    
    for _, row in train.iterrows():
        new_list = []
        for i in row.index:
            ts = []
            for j in range(len(row[i])):
                ts.append(row[i][j])
            new_list.append(ts)
        new_train.append(pad_sequences(new_list, maxlen=maxlen, dtype='float32'))
        
    for _, row in test.iterrows():
        new_list = []
        for i in row.index:
            ts = []
            for j in range(len(row[i])):
                ts.append(row[i][j])
            new_list.append(ts)
        new_test.append(pad_sequences(new_list, maxlen=maxlen, dtype='float32'))
            
    train_df = pd.DataFrame(np.array(new_train).reshape(train.shape[0], maxlen * train.columns.shape[0]))
    test_df = pd.DataFrame(np.array(new_test).reshape(test.shape[0], maxlen * train.columns.shape[0]))

    scaler = RobustScaler()
    scaler.fit(train_df)
    return scaler.transform(train_df), scaler.transform(test_df), maxlen * train.columns.shape[0]
#     return np.array(train_df), np.array(test_df), maxlen * train.columns.shape[0]

def rnn_reshape(train, test, n_steps, n_features):
#     train, test = flatten_ts(train, test)
    return train.reshape(train.shape[0], n_steps, n_features), test.reshape(test.shape[0], n_steps, n_features)

In [22]:
data_name = 'Cricket'
univariate = False
train_x, train_y, test_x, test_y = load_data(data_name, univariate=univariate)    
n_features = train_x.columns.shape[0]

X_train, X_test, n_steps = flatten_ts(train_x, test_x)
X_train, X_test = rnn_reshape(X_train, X_test, n_steps // n_features, n_features)

encoder, decoder = TRepNet(n_steps // n_features, n_features, activation='elu')
model = keras.models.Sequential([encoder, decoder])

model.compile(loss="mae", optimizer=keras.optimizers.Nadam(lr=0.001, clipnorm=1.), metrics=['mae'])
history = model.fit(X_train, X_train, epochs=500, batch_size=16, validation_data=[X_test, X_test], callbacks=[es], verbose=0, shuffle=False)

# Codings
codings_train = encoder.predict(X_train)
codings_test = encoder.predict(X_test)

train_y = (train_y.astype(float)).astype(int)
test_y = (test_y.astype(float)).astype(int)

tsne = manifold.TSNE(perplexity = 30, random_state=7)
X_embedded = tsne.fit_transform(codings_test)

plot = sns.scatterplot(X_embedded[:,0], X_embedded[:,1], hue=test_y)
fig = plot.get_figure()
fig.savefig('./tSNE/'+data_name+"-test.png")



ResourceExhaustedError:  OOM when allocating tensor with shape[24672,7182] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node Nadam/Nadam/update_232/mul_2 (defined at /home/patara/anaconda3/envs/patara/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_distributed_function_1041617]

Function call stack:
distributed_function


In [None]:
# for name in ['SyntheticControl', 'Chinatown', 'DiatomSizeReduction', 'GunPointOldVersusYoung', 'InsectEPGRegularTrain', 'PowerCons', 'UWaveGestureLibraryAll']:
#     print(name)
#     run_tSNE(name)

In [None]:
# for name in ['ArticularyWordRecognition', 'ERing', 'Handwriting']:
#     print(name)
#     run_tSNE(name, False)