# Lezen van de files

Importeren libraries

In [1]:
import h5py
import os
import numpy as np
import random


Hier 1 class van maken, doet:
* lezen van de data
* Specificeren van de folder waarvan je de data wil inlezen in folder

## Class voor all preprocess stappen

In [2]:
import numpy as np
import os
import h5py
import random

class DataLoader:
    def __init__(self, base_directory=''):
        self.base_directory = base_directory

    def get_dataset_name(self, file_name_with_dir):
        filename_without_dir = file_name_with_dir.split('/')[-1]
        temp = filename_without_dir.split('_')[:-1]
        dataset_name = "_".join(temp)
        return dataset_name

    def znorm(self, data):
        """
        Normalizes time-wise
        """
        mean_rows = np.mean(data, axis=1, keepdims=True)
        std_rows = np.std(data, axis=1, keepdims=True)
        scaled_data = ((data - mean_rows) / std_rows)
        return scaled_data

    def load_data_from_folder(self, folder, shuffle=True, downsample_factor=4):
        data_directory = os.path.join(self.base_directory, folder)
        data = []
        labels = []

        label_mapping = {
            'rest': 0,
            'task_motor': 1,
            'task_story_math': 2,
            'task_working_memory': 3
        }

        file_names = [file_name for file_name in os.listdir(data_directory) if file_name.endswith(".h5")]
        if shuffle:
            random.shuffle(file_names)

        for file_name in file_names:
            file_path = os.path.join(data_directory, file_name)
            with h5py.File(file_path, 'r') as f:
                dataset_name = self.get_dataset_name(file_name)
                matrix = f.get(dataset_name)[()]

                label = None
                for task_prefix in label_mapping.keys():
                    if task_prefix in file_name:
                        label = label_mapping[task_prefix]  # Use the numerical value from label_mapping
                        break

                if label is not None:
                    matrix = self.znorm(matrix)
                    matrix = matrix[:, ::downsample_factor]

                    data.append(matrix)
                    labels.append(label)  # Append the label directly
                else:
                    print(f"Warning: No label found for file {file_name}")

        return np.array(data), np.array(labels)

# Example usage
data_loader = DataLoader()


### Inlezen Cross

In [3]:
# CROSS

# Load data and labels for each subset
data_train1, labels_train1 = data_loader.load_data_from_folder('./Final Project data/Cross/train')
data_test1, labels_test1 = data_loader.load_data_from_folder('./Final Project data/Cross/test1')
data_test2, labels_test2 = data_loader.load_data_from_folder('./Final Project data/Cross/test2')
data_test3, labels_test3 = data_loader.load_data_from_folder('./Final Project data/Cross/test3')

# Print shapes of loaded data
print(f"Train Data Shape: {data_train1.shape}, Train Labels Shape: {labels_train1.shape}")
print(f"Test1 Data Shape: {data_test1.shape}, Test1 Labels Shape: {labels_test1.shape}")
print(f"Test2 Data Shape: {data_test2.shape}, Test2 Labels Shape: {labels_test2.shape}")
print(f"Test3 Data Shape: {data_test3.shape}, Test3 Labels Shape: {labels_test3.shape}")

Train Data Shape: (64, 248, 8906), Train Labels Shape: (64,)
Test1 Data Shape: (16, 248, 8906), Test1 Labels Shape: (16,)
Test2 Data Shape: (16, 248, 8906), Test2 Labels Shape: (16,)
Test3 Data Shape: (16, 248, 8906), Test3 Labels Shape: (16,)


### Inlezen Intra

In [4]:
# intra train

# Load the preprocessed data and labels
data_train, labels_train = data_loader.load_data_from_folder('./Final Project data/Intra/train')

# Print shapes of loaded data
print(f"Data Shape: {data_train.shape}")
print(f"Labels Shape: {labels_train.shape}")

Data Shape: (32, 248, 8906)
Labels Shape: (32,)


In [5]:
## Test intra
# Load the preprocessed data and labels
data_test, labels_test = data_loader.load_data_from_folder('./Final Project data/Intra/test')

# Print shapes of loaded data
print(f"Data Shape: {data_test.shape}")
print(f"Labels Shape: {labels_test.shape}")

Data Shape: (8, 248, 8906)
Labels Shape: (8,)


### Trainen + Maken van het model (intra)

In [6]:
# reshapen zodat het in de vorm: [nr samples, time steps, features] is, voor LSTM
X_train = data_train1
X_test = data_test1
y_train = labels_train1
y_test = labels_test1

X_train = X_train.reshape((X_train.shape[0], X_train.shape[2], X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[2], X_test.shape[1]))

In [11]:
# https://medium.com/geekculture/10-hyperparameters-to-keep-an-eye-on-for-your-lstm-model-and-other-tips-f0ff5b63fcd4
# 
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import mean_squared_error

def create_model_oud(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(4, activation='softmax'))  # 4 classes
    model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_model(input_shape):
    model = Sequential()
    model.add(LSTM(4, input_shape=input_shape))
    model.add(Dense(1))
    model.add(Dense(4, activation='softmax'))  # 4 classes
    model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001), metrics=['accuracy'])
    return model

### LSTM met hyperparametertuning

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from kerastuner.tuners import RandomSearch


def build_model(hp):
    model = Sequential()

    # Tuning the number of LSTM layers and their units
    for i in range(hp.Int('num_lstm_layers', 1, 2)):
        model.add(LSTM(
            units=hp.Choice('units_' + str(i), values=[16]), #values=[16, 32, 64, 128]
            return_sequences=i < hp.get('num_lstm_layers') - 1,  # Only the last layer should not return sequences
            input_shape=(X_train.shape[1], X_train.shape[2]) if i == 0 else None))

    # Tuning the number of Dense layers and their units
    for i in range(hp.Int('num_dense_layers', 1, 3)):
        model.add(Dense(
            units=hp.Choice('dense_units_' + str(i), values=[16]), #values=[16, 32, 64, 128]
            activation='relu'))

    model.add(Dense(4, activation='softmax'))  # 4 classes

    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2])), #values=[1e-2, 1e-3, 1e-4]
        loss='mean_squared_error',
        metrics=['accuracy']
    )
    return model

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=1, #5
    executions_per_trial=1, #3
    directory='./',
    project_name='hparam_tuning'
)

tuner.search_space_summary()

# Start the hyperparameter tuning
tuner.search(X_train, y_train, epochs=1, batch_size=64, validation_data=(X_test, y_test))

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. 
The optimal number of LSTM and Dense layers and their units can be reviewed in the best hyperparameters.
""")

# Build the model with the optimal hyperparameters and train it
model = tuner.hypermodel.build(best_hps)
model.fit(X_train, y_train, epochs=1, batch_size=64, validation_data=(X_test, y_test))


Trial 2 Complete [00h 00m 14s]
val_accuracy: 0.3125

Best val_accuracy So Far: 0.3125
Total elapsed time: 00h 00m 14s
INFO:tensorflow:Oracle triggered exit



The hyperparameter search is complete. 
The optimal number of LSTM and Dense layers and their units can be reviewed in the best hyperparameters.



<keras.callbacks.History at 0x24ecb74fd00>

In [12]:
model = create_model((X_train.shape[1], X_train.shape[2]))
history = model.fit(X_train, y_train, epochs=7, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/7


KeyboardInterrupt: 

In [11]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 31.25%


In [32]:
y_test[0]

array([1., 0., 0., 0.])

In [13]:
# predict:
from sklearn.metrics import mean_squared_error
trainPredict = model.predict(X_train)
testPredict = model.predict(X_test)

#rmse 
#trainScore = np.sqrt(mean_squared_error(y_train[0], trainPredict[:,0]))
#print(trainScore)
testScore = np.sqrt(mean_squared_error(y_test, testPredict[:,0]))
print(testScore)

1.6780759269094447
