# Lezen van de files

Importeren libraries

In [30]:
import h5py
import os
import numpy as np
import random

Hier 1 class van maken, doet:
* lezen van de data
* Specificeren van de folder waarvan je de data wil inlezen in folder

## Class voor all preprocess stappen

In [31]:
"""
Deze class zorgt ervoor dat de data wordt geladen , de folder moet gespecificeerd worden bij het aanroepen. 
Het geeft een matrix terug met sensors als rows en timestamps als columns, en de labels (de tasks)
"""
class DataLoader:
    def __init__(self, base_directory='Final_Project_data/'):
        self.base_directory = base_directory

    def get_dataset_name(self, file_name_with_dir):
        filename_without_dir = file_name_with_dir.split('/')[-1]
        temp = filename_without_dir.split('_')[:-1]
        dataset_name = "_".join(temp)
        return dataset_name

    def znorm(self, data):
        """
        Normalizes time-wise
        """
        mean_rows = np.mean(data, axis=1, keepdims=True)
        std_rows = np.std(data, axis=1, keepdims=True)
        scaled_data = ((data - mean_rows) / std_rows)
        return scaled_data

    def load_data_from_folder(self, folder, shuffle=True, downsample_factor=4): # hier kan de downsampling factor veranderd worden
        data_directory = os.path.join(self.base_directory, folder)
        data = []
        labels = []

        label_mapping = {
            'rest': 0,
            'task_motor': 1,
            'task_story_math': 2,
            'task_working_memory': 3
        }

        file_names = [file_name for file_name in os.listdir(data_directory) if file_name.endswith(".h5")]
        if shuffle:
            random.shuffle(file_names)

        for file_name in file_names:
            file_path = os.path.join(data_directory, file_name)
            with h5py.File(file_path, 'r') as f:
                dataset_name = self.get_dataset_name(file_name)
                matrix = f.get(dataset_name)[()]

                label = None
                for task_prefix in label_mapping.keys():
                    if task_prefix in file_name:
                        label = task_prefix
                        break

                if label is not None:
                    matrix = self.znorm(matrix)
                    matrix = matrix[:, ::downsample_factor]

                    one_hot_label = np.zeros(len(label_mapping))
                    one_hot_label[label_mapping[label]] = 1
                    data.append(matrix)
                    labels.append(one_hot_label)
                else:
                    print(f"Warning: No label found for file {file_name}")

        return np.array(data), np.array(labels)
        
data_loader = DataLoader()


### Inlezen Cross

In [32]:
# CROSS

# Load data and labels for each subset
data_train, labels_train = data_loader.load_data_from_folder('Cross/train')
data_test1, labels_test1 = data_loader.load_data_from_folder('Cross/test1')
data_test2, labels_test2 = data_loader.load_data_from_folder('Cross/test2')
data_test3, labels_test3 = data_loader.load_data_from_folder('Cross/test3')

# Print shapes of loaded data
print(f"Train Data Shape: {data_train.shape}, Train Labels Shape: {labels_train.shape}")
print(f"Test1 Data Shape: {data_test1.shape}, Test1 Labels Shape: {labels_test1.shape}")
print(f"Test2 Data Shape: {data_test2.shape}, Test2 Labels Shape: {labels_test2.shape}")
print(f"Test3 Data Shape: {data_test3.shape}, Test3 Labels Shape: {labels_test3.shape}")

Train Data Shape: (64, 248, 8906), Train Labels Shape: (64, 4)
Test1 Data Shape: (16, 248, 8906), Test1 Labels Shape: (16, 4)
Test2 Data Shape: (16, 248, 8906), Test2 Labels Shape: (16, 4)
Test3 Data Shape: (16, 248, 8906), Test3 Labels Shape: (16, 4)


### Inlezen Intra

In [34]:
# intra train

# Load the preprocessed data and labels
data_train, labels_train = data_loader.load_data_from_folder('Intra/train')

# Print shapes of loaded data
print(f"Data Shape: {data_train.shape}")
print(f"Labels Shape: {labels_train.shape}")

Data Shape: (32, 248, 8906)
Labels Shape: (32, 4)


In [35]:
## Test intra
# Load the preprocessed data and labels
data_test, labels_test = data_loader.load_data_from_folder('Intra/test')

# Print shapes of loaded data
print(f"Data Shape: {data_test.shape}")
print(f"Labels Shape: {labels_test.shape}")

Data Shape: (8, 248, 8906)
Labels Shape: (8, 4)


### Trainen + Maken van het model (intra)

In [36]:
# reshapen zodat het in de vorm: [nr samples, time steps, features] is, voor LSTM
X_train = data_train
X_test = data_test

X_train = X_train.reshape((X_train.shape[0], X_train.shape[2], X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[2], X_test.shape[1]))

In [37]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import tensorflow as tf

def create_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(4, activation='softmax'))  # 4 classes

    model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [38]:
y_train = labels_train
y_test = labels_test

In [39]:
model = create_model((X_train.shape[1], X_train.shape[2]))
history = model.fit(X_train, y_train, epochs=4, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [40]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 37.50%
