In [1]:
import time
import collections
import pickle
import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.cluster import KMeans
from sliding_window import sliding_window
from cluster_eval import greedy_relabel
from cluster_eval import brute_relabel
from cluster_eval import print_results
from cluster_eval import relabel_list
from keras import layers

# Hardcoded number of sensor channels employed in the OPPORTUNITY challenge
NB_SENSOR_CHANNELS = 113

# Hardcoded number of classes in the gesture recognition problem
NUM_CLASSES = 4

# Hardcoded length of the sliding window mechanism employed to segment the data
SLIDING_WINDOW_LENGTH = 24

# Length of the input sequence after convolutional operations
FINAL_SEQUENCE_LENGTH = 6

# Hardcoded step of the sliding window mechanism employed to segment the data
SLIDING_WINDOW_STEP = 12

# Batch Size
BATCH_SIZE = 100

# Number filters convolutional layers
NUM_FILTERS = 64

# Size filters convolutional layers
FILTER_SIZE = 5

# Number of unit in the long short-term recurrent layers
NUM_UNITS_LSTM = 128

In [2]:
def load_dataset(filename):

    f = open(filename, 'rb')
    data = pickle.load(f)
    f.close()

    X_train, y_train = data[0]
    X_test, y_test = data[1]

    print(" ..from file {}".format(filename))
    print(" ..reading instances: train {0}, test {1}".format(X_train.shape, X_test.shape))

    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)

    # The targets are casted to int8 for GPU compatibility.
    y_train = y_train.astype(np.uint8)
    y_test = y_test.astype(np.uint8)

    return X_train, y_train, X_test, y_test

print("Loading data...")
X_train, y_train, X_test, y_test = load_dataset('oppChallenge_locomotion.data')

def remove_class(X_train, y_train, X_test, y_test, class_n):
    uy_train = y_train[y_train != class_n]
    uX_train = X_train[y_train != class_n]
    uy_test = y_test[y_test != class_n]
    uX_test = X_test[y_test != class_n]
    
    classes = set(y_train)
    
    for x in range(class_n + 1, len(classes)):
        uy_train[uy_train == x] = x - 1
        uy_test[uy_test == x] = x - 1
        
    return uX_train, uy_train, uX_test, uy_test

# def delete_features():
# RKN^, RKN_, BACK, HIP, R-SHOE, L-SHOE
# 53 Features

#features_delete = np.arange(6, 15)
#features_delete = np.concatenate([features_delete, np.arange(21, 36)])
#features_delete = np.concatenate([features_delete, np.arange(45, 81)])

# 60 Features
#features_delete = np.arange(0, 6)
#features_delete = np.concatenate([features_delete, np.arange(15, 21)])
#features_delete = np.concatenate([features_delete, np.arange(36, 45)])
#features_delete = np.concatenate([features_delete, np.arange(81, 113)])

# 9 Features (Phone)
#features_delete = np.arange(0, 36)
#features_delete = np.concatenate([features_delete, np.arange(45, 113)])

#features_delete = np.concatenate([features_delete, np.arange(6, 113)])

#X_train = np.delete(X_train, features_delete, 1)
#X_test = np.delete(X_test, features_delete, 1)

classes = ["Null", "Stand", "Walk" ,"Sit", "Lie"]

# 0 - null | 1 - stand | 2 - walk | 3 - sit | 4 - lie

X_train, y_train, X_test, y_test = remove_class(X_train, y_train, X_test, y_test, 0) # remove null
classes.remove("Null")

# 0 - stand | 1 - walk | 2 - sit | 3 - lie

#X_train, y_train, X_test, y_test = remove_class(X_train, y_train, X_test, y_test, 1) # remove walking
#classes.remove("Walk")

#X_train, y_train, X_test, y_test = remove_class(X_train, y_train, X_test, y_test, 3) # remove walking
#classes.remove("Lie")

#X_train, y_train, X_test, y_test = remove_class(X_train, y_train, X_test, y_test, 2) # remove walking
#classes.remove("Sit")

# 0 - stand | 1 - sit | 2 - lie

print(X_train.shape)
print(X_test.shape)

unique, counts = np.unique(y_train, return_counts=True)
label_dict = dict(zip(unique, counts))
print(label_dict)

Loading data...
 ..from file oppChallenge_locomotion.data
 ..reading instances: train (557963, 113), test (118750, 113)
(465668, 113)
(94260, 113)
{0: 231751, 1: 130506, 2: 88883, 3: 14528}


In [3]:
def opp_sliding_window(data_x, data_y, ws, ss):
    data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))
    data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])
    return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)

# Sensor data is segmented using a sliding window mechanism

X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)

print(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape))

print(X_train.shape)
print(X_test.shape)

 ..after sliding window (testing): inputs (7854, 24, 113), targets (7854,)
(38804, 24, 113)
(7854, 24, 113)


In [4]:
from numpy.random import seed
import tensorflow as tf

In [32]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
# lstm autoencoder doesnt need a sliding window, redudent data can lead to overfitting

seed_n = 9
seed(seed_n)
tf.random.set_seed(seed_n)

model = Sequential()
# encoder
model.add(LSTM(64, activation='relu', input_shape=(SLIDING_WINDOW_LENGTH, 113), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
#model.add(LSTM(16, activation='relu', return_sequences=False))
model.add(RepeatVector(SLIDING_WINDOW_LENGTH))
# decoder
#model.add(LSTM(16, activation='relu', return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(113)))
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_16 (LSTM)               (None, 24, 64)            45568     
_________________________________________________________________
lstm_17 (LSTM)               (None, 32)                12416     
_________________________________________________________________
repeat_vector_4 (RepeatVecto (None, 24, 32)            0         
_________________________________________________________________
lstm_18 (LSTM)               (None, 24, 32)            8320      
_________________________________________________________________
lstm_19 (LSTM)               (None, 24, 64)            24832     
_________________________________________________________________
time_distributed_4 (TimeDist (None, 24, 113)           7345      
Total params: 98,481
Trainable params: 98,481
Non-trainable params: 0
__________________________________________________

In [33]:
from keras.callbacks import TensorBoard

model.fit(X_train, X_train, epochs=30, batch_size=100, validation_split=0.15, callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x212a53931f0>

In [None]:
model.save('rae24_12_s0_d2')

In [14]:
# load model
model = keras.models.load_model('rae24_12_s0_d2')

In [34]:
# encoder becomes output
encoder = keras.Model(inputs=model.inputs, outputs=model.layers[1].output)

In [35]:
# get encoded data
encoded_train = encoder.predict(X_train, verbose=0)
encoded_test = encoder.predict(X_test, verbose=0)
print(encoded_train.shape)
print(encoded_test.shape)

(38804, 32)
(7854, 32)


In [36]:
from sklearn.cluster import KMeans
import operator

NUMBER_OF_CLUSTERS = 11

start = time.time()

y_pred = KMeans(n_clusters=NUMBER_OF_CLUSTERS, random_state=0).fit(encoded_train).predict(encoded_test)
#y_pred = KMeans(n_clusters=NUMBER_OF_CLUSTERS, random_state=0).fit(encoded_train, sample_weight=sample_weight_train).predict(encoded_test)

end = time.time()

print("Time:")
print(end - start)

Time:
2.1525440216064453


In [37]:
uy_pred = y_pred.copy()
clusters = list(set(y_pred))
clusters.sort()

relabels = brute_relabel(uy_pred, y_test)
#relabels = greedy_relabel(uy_pred, y_test)
#relabels = [0, 0, 3, 0, 0, 0, 0, 0, 1, 2, 3]

uy_pred = relabel_list(uy_pred, clusters, relabels)
print_results(uy_pred, y_test, relabels)

Assigned Labels:
[0, 0, 2, 0, 3, 0, 1, 0, 0, 2, 0]

F1:
0.7350264174165717

Accuracy:
0.679144385026738

ARI:
0.4149103716516561


In [31]:
import itertools

#checking specific lists

clusters = list(set(y_pred))
clusters.sort()

f1 = 0
label_list = None
for test_list in test:
    for i in range(0, len(test_list)):
        score = metrics.f1_score(relabel_list(y_pred.copy(), clusters, list(test_list[i])), y_test, average='weighted')
        if (score > f1):
            f1 = score
            label_list = list(test_list[i])

uy_pred = y_pred.copy()

uy_pred = relabel_list(uy_pred, clusters, label_list)
print_results(uy_pred, y_test, label_list)

Assigned Labels:
[0, 0, 0, 2, 0, 0, 0, 0, 3, 1, 3]

F1:
0.759999914759273

Accuracy:
0.6633562515915458

ARI:
0.4301472472581655


In [38]:
print(len(set(y_pred)))

11


In [23]:
import itertools

test = []
test.append(list(set(itertools.permutations([0, 1, 0, 0, 2, 0, 3, 0, 0, 3, 2]))))
test.append(list(set(itertools.permutations([3, 0, 0, 2, 2, 0, 0, 1, 0, 0, 0]))))
test.append(list(set(itertools.permutations([0, 0, 1, 0, 0, 3, 0, 0, 3, 0, 2]))))
#test.append(list(set(itertools.permutations([0, 0, 1, 0, 0, 3, 3, 0, 3, 0, 2]))))
test.append(list(set(itertools.permutations([0, 0, 3, 0, 1, 0, 2, 0, 0, 0, 0]))))
#test.append(list(set(itertools.permutations([1, 2, 0, 1, 2, 1, 0, 2, 3, 0, 0]))))

In [None]:
# Confusion Matrix
cm = metrics.confusion_matrix(y_test, uy_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp = disp.plot()

#plt.savefig('confusion_matricies/c4_f113_km' + str(NUMBER_OF_CLUSTERS) + '+sw24_12+rae.png') # number of classes, features, method
plt.show()