### Installing dependencies

                                                    Project details                                                                
                                 
The data represents various brain activities: resting, math & story tasks, working memory, and motor tasks.

    The 'Intra' folder contains data from one subject, while the 'Cross' folder includes multiple subjects.

Each file is a matrix of shape 248 x 35624, where 248 represents the number of sensors, and 35624 represents time steps.

The files have the following format: “taskType subjectIdentifier number.h5”
where taskType can be rest, task motor, task story math, and task working memory.

In practice, these tasks correspond to the activities performed by the subjects:

    • Resting Task
Recording the subjects’ brain while in a relaxed resting
state.

    • Math & Story Task
Subject performs mental calculation and language
processing task.

    • Working Memory task
Subject performs a memorization task.

    • Motor Task
Subject performs a motor task, typically moving fingers
or feets

In [99]:
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
from torch import FloatTensor, LongTensor
from typing import Tuple, List, Callable, Optional
from sklearn.metrics import accuracy_score
import os
import numpy as np
from tqdm import tqdm

Reading data:

In [100]:
def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

In [101]:
# Data Preprocessing

# min-max scaling
def minmax(trial):
    min = trial.min()
    max = trial.max()
    normalisedTrial = (trial - min)/(max-min)
    return normalisedTrial

#Z-score normalisation OPTIONAL
def zscore(trial):
    mean = trial.mean()
    sd = trial.std()
    normalisedTrial = (trial - mean)/sd 
    return normalisedTrial

#downsamples data by totaltimesteps/factor
def downsample(trial, factor):
    ds_trial = trial[:,::factor]
    return ds_trial



In [102]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        # RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Forward pass through RNN
        rnn, _ = self.rnn(x)
        
        # Only take the output from the final time step
        output = self.fc(rnn[:, -1, :])
        return output
    

In [103]:
# Code for storing data in a folder into an array


def preprocess_files(files = None, path = 'Final Project data/Cross/train', downsampling = 30):
    label_to_int = {'rest': 0, 'task_motor': 1, 'task_story_math': 2, 'task_working_memory': 3}

    cross_data_train = [] # Store data
    cross_data_train_labels = [] # Store labels (based on filename)

    if files == None:
        files = os.listdir(path)

    for file in files:
        file_path = f'{path}/{file}'
        
        with h5py.File(file_path, 'r') as h5_file:
            # obtain labels
            dataset_name = get_dataset_name(file_path)
            label = dataset_name.split('_')
            label.remove(label[len(label)-1])
            label = '_'.join(label)
            cross_data_train_labels.append(label_to_int[label])
            
            # obtain X_data
            matrix = h5_file.get(dataset_name)[()]
            normalisedMatrix = downsample(zscore(matrix), downsampling) # apply minmax normalisation and downsampling
            cross_data_train.append(normalisedMatrix.T) # Transpose
             
    X = torch.from_numpy(np.array(cross_data_train)).float()
    y = torch.tensor(cross_data_train_labels)        
            
    return X, y

In [109]:
import random
random.seed = 123 # Set seed for reproducability
input_size = 248
hidden_size = 200
output_size = 4
network = RNN(input_size, hidden_size, output_size)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(network.parameters(), lr=0.001)

path = 'Final Project data/Cross/train'
files = os.listdir(path)
random.shuffle(files) # Shuffle order of files
current_samples = []
n = 8
batch_index = 1

for i, file in tqdm(enumerate(files)):
    current_samples.append(file)
    if len(current_samples) == n or i == (len(files)-1):
        print(f"training batch {batch_index}...")
        X_train, y_train = preprocess_files(current_samples, downsampling=1) 
        current_samples = []
        
        network.train()
        opt.zero_grad()
        output = network(X_train)
        loss = loss_fn(output, y_train)
        loss.backward()
        opt.step()
        
        batch_index += 1

0it [00:00, ?it/s]

training batch 1...


8it [00:32,  4.08s/it]

training batch 2...


16it [01:05,  4.06s/it]

training batch 3...


24it [01:38,  4.10s/it]

training batch 4...


32it [02:14,  4.29s/it]

training batch 5...


40it [02:52,  4.43s/it]

training batch 6...


48it [03:27,  4.44s/it]

training batch 7...


56it [04:02,  4.40s/it]

training batch 8...


64it [04:36,  4.32s/it]


In [119]:
# testing:
paths = [ 'Final Project data/Cross/test1',  'Final Project data/Cross/test2',  'Final Project data/Cross/test3']
for path in paths:
    files = os.listdir(path)
    print(files)
    X, y = preprocess_files(files, path, 1)
    network.eval()

    test_output = network(X).detach().numpy()
    pred = np.argmax(test_output, axis=1) # to numpy
    y = y.numpy()
    print(pred)
    print(accuracy_score(pred, y))


['rest_162935_1.h5', 'rest_162935_10.h5', 'rest_162935_3.h5', 'rest_162935_5.h5', 'task_motor_162935_1.h5', 'task_motor_162935_3.h5', 'task_motor_162935_4.h5', 'task_motor_162935_9.h5', 'task_story_math_162935_2.h5', 'task_story_math_162935_3.h5', 'task_story_math_162935_4.h5', 'task_story_math_162935_6.h5', 'task_working_memory_162935_3.h5', 'task_working_memory_162935_4.h5', 'task_working_memory_162935_5.h5', 'task_working_memory_162935_7.h5']
[0 0 0 0 3 3 3 3 2 1 2 2 3 3 3 3]
0.6875
['rest_707749_10.h5', 'rest_707749_4.h5', 'rest_707749_5.h5', 'rest_707749_7.h5', 'task_motor_707749_2.h5', 'task_motor_707749_7.h5', 'task_motor_707749_8.h5', 'task_motor_707749_9.h5', 'task_story_math_707749_10.h5', 'task_story_math_707749_2.h5', 'task_story_math_707749_5.h5', 'task_story_math_707749_6.h5', 'task_working_memory_707749_10.h5', 'task_working_memory_707749_4.h5', 'task_working_memory_707749_8.h5', 'task_working_memory_707749_9.h5']
[0 0 0 0 3 3 3 3 3 3 3 3 3 0 3 3]
0.4375
['rest_725751_10