In [1]:
import tensorflow.compat.v1 as tf
tf.enable_eager_execution(tf.ConfigProto(log_device_placement=True))
tf.test.gpu_device_name()

''

In [2]:
# Load accelData from pickle

import pickle
import numpy as np
import matplotlib.pyplot as plt

# open accelData pickle
import pickle
with open('drive/MyDrive/Pickles/accelData.pickle', 'rb') as handle:
    accelData = pickle.load(handle)

# Remove time column from accelData Measurements
accelData['Measurements with Time'] = accelData['Measurements']

for i in range(len(accelData['Measurements'])):
    accelData['Measurements'][i] = accelData['Measurements'][i].iloc[:, 1:]

In [8]:
# Create a window generator function to save memory
def window_generator_old(data, labels, windowSize, stride, batch_size=128):
    while True:  # Loop forever so the generator never terminates
        batch_features = []
        batch_labels = []
        for i in np.random.permutation(len(data)):
            print(f'Processing trial {i} of {len(data)}')
            windows = createWindows(data[i], windowSize, stride)
            for window in windows:
                # Normalize the window
                window_normalized = (window - np.mean(window)) / np.std(window)
                batch_features.append(window_normalized)
                batch_labels.append(labels[i])

                if len(batch_features) == batch_size:
                    print('Yielding batch')
                    print(np.array(batch_features).shape)
                    # Yield the batch data
                    yield np.array(batch_features), np.array(batch_labels)
                    batch_features = []
                    batch_labels = []

def window_generator(data, labels, windowSize, stride, batch_size=128):
    while True:  # Loop forever so the generator never terminates
        batch_features = []
        batch_labels = []
        batch_windows = []
        for i in np.random.permutation(len(data)):
            batch_windows.append(data[i])
            # Normalize the window
            #window_normalized = (window - np.mean(window, axis=0)) / np.std(window, axis=0)
            #batch_features.append(window_normalized)
            batch_labels.append(labels[i])
        


            if len(batch_windows) == batch_size:
                #batch_windows_array = np.array(batch_windows)
                #batch_features= vectorized_normalization(batch_windows_array)
                # Yield the batch data
                yield np.array(batch_windows), np.array(batch_labels)
                batch_features = []
                batch_labels = []


def vectorized_normalization(windows):
    # Compute means and standard deviations for each window
    means = windows.mean(axis=1, keepdims=True)
    stds = windows.std(axis=1, keepdims=True)

    # Normalize
    normalized_windows = (windows - means) / stds

    return normalized_windows


from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, Dropout, MaxPooling1D, Normalization
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau


# class weight import
from sklearn.utils import class_weight

def build_model(input_shape, num_classes):
    oldmodel = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Dropout(0.5),
        Conv1D(filters=128, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.5),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model = Sequential([
        Normalization(input_shape=input_shape),
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape),  # Reduced filters
        MaxPooling1D(pool_size=2),
        Flatten(),  # Removed one Conv1D and one Dropout layer for simplicity
        Dense(64, activation='relu'),  # Reduced the size of the dense layer
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])


    model.compile(optimizer=Adam(learning_rate=0.01),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'],
                  run_eagerly=False)
    return model




def createWindows(data, windowSize, stride):
    windows = []
    for i in range(0, len(data) - windowSize, stride):
        windows.append(data[i:i+windowSize])
    return windows
    
def batchWindowClassification(trials, labels, windowSize, stride, batch_size=100, verbose=False):

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(trials, labels, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    if verbose:
        # plot histogram showing distribution of labels
        plt.hist(y_train)
        # title
        plt.title('Distribution of labels in training set')
        plt.show()

        plt.hist(y_test)
        # title
        plt.title('Distribution of labels in testing set')
        plt.show()  


        print(y_train)
        print(y_test)

    unique_labels = np.unique(np.concatenate((y_train, y_test)))
    num_classes = len(unique_labels)
    if verbose:
        print(f"Number of unique classes: {num_classes}")

    # Prepare labels for training
    encoder = LabelEncoder()
    encoder.fit(np.concatenate((y_train, y_test)))

    # Split X_train into windows
    trainWindows = []
    trainWindowLabels = []
    for i in range(len(X_train)):
        windows = createWindows(X_train[i], windowSize, stride)
        trainWindows.extend(windows)
        labelArray = np.full(len(windows), y_train[i])
        # Convert labelArray to a list
        trainWindowLabels.extend(labelArray.tolist())

    valWindows = []
    valWindowLabels = []
    for i in range(len(X_val)):
        windows = createWindows(X_val[i], windowSize, stride)
        valWindows.extend(windows)
        labelArray = np.full(len(windows), y_val[i])
        # Convert labelArray to a list
        valWindowLabels.extend(labelArray.tolist())
    
    print(len(trainWindows))

    # encode trainWindowLabels
    encoded_trainWindowLabels = encoder.transform(trainWindowLabels)
    encoded_trainWindowLabels = to_categorical(encoded_trainWindowLabels, num_classes=num_classes)

    # encode valWindowLabels
    encoded_valWindowLabels = encoder.transform(valWindowLabels)
    encoded_valWindowLabels = to_categorical(encoded_valWindowLabels, num_classes=num_classes)


    # Creating generators for training and validation
    train_gen = window_generator(trainWindows, encoded_trainWindowLabels, windowSize, stride, batch_size=batch_size)
    val_gen = window_generator(valWindows, encoded_valWindowLabels, windowSize, stride, batch_size=batch_size)

    # Assuming `y_train` contains the original labels
    class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights_dict = dict(enumerate(class_weights))


    model = build_model((windowSize, 16), 3)

    # Adapting the Normalization layer with a batch of data from the training set
    for X_batch, _ in window_generator(trainWindows, encoded_trainWindowLabels, windowSize, stride, batch_size=128):
        model.layers[0].adapt(X_batch)  # Adapt the normalization layer
        break 

    # Instantiate the callback
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
    
    # Assume your model is named 'model' and is already defined
    model.fit(train_gen, 
        #callbacks=[reduce_lr], 
        #validation_data=val_gen,
        #steps_per_epoch=max(1, len(trainWindows) // batch_size), 
        steps_per_epoch=100,
        epochs=10, 
        verbose=1) #, class_weight=class_weights_dict)    

    # Split X_train into windows
    testWindows = []
    testWindowLabels = []
    for i in range(len(X_test)):
        windows = createWindows(X_test[i], windowSize, stride)
        testWindows.extend(windows)
        labelArray = np.full(len(windows), y_test[i])
        # Convert labelArray to a list
        testWindowLabels.extend(labelArray.tolist())

    # encode testWindowLabels
    encoded_testWindowLabels = encoder.transform(testWindowLabels)
    encoded_testWindowLabels = to_categorical(encoded_testWindowLabels, num_classes=num_classes)

    # Creating generators for testing
    test_gen = window_generator(testWindows, encoded_testWindowLabels, windowSize, stride, batch_size=batch_size)
    test_steps = max(1, len(testWindows) // batch_size)  # Ensure at least 1 step


    # Evaluate the model
    loss, accuracy = model.evaluate(test_gen, steps= test_steps, verbose=1)

    # Print out predictions
    predictions = model.predict(test_gen, steps=test_steps, verbose=1)

    # transform predictions to labels
    predictions = np.argmax(predictions, axis=1)
    predictions = encoder.inverse_transform(predictions)
    # plot histogram showing distribution of predictions
    plt.hist(predictions)
    # title
    plt.title('Distribution of predictions')
    plt.show()

    # print distribution of predictions
    print(np.unique(predictions, return_counts=True))

    print(f'Accuracy: {accuracy}')
    print(f'Loss: {loss}')

    return loss, accuracy, model



In [9]:
# Test batchWindowClassification with different windowSizes, and strides of windowSizes/2 
# Make batch size inversely proportional to windowSize
# Make verbose=False

windowSizes = [100, 1000, 5000, 10000]
strides = [50, 500, 2500, 5000]
allResults = []

for windowSize in windowSizes:
    
    print(f'windowSize: {windowSize}, stride: {windowSize/2}')
    loss, accuracy, model = batchWindowClassification(accelData['Measurements'], accelData['Abnormal'], windowSize, int(windowSize/2), batch_size=100, verbose=False)
    
    # save results in variable
    results = {
        'windowSize': windowSize,
        'stride': stride,
        'loss': loss,
        'accuracy': accuracy,
        'model': model
    }
    allResults.append(results)



windowSize: 100, stride: 50.0
226197
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Epoch 1/10
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10