### Installing dependencies

                                                    Project details                                                                
                                 
The data represents various brain activities: resting, math & story tasks, working memory, and motor tasks.

    The 'Intra' folder contains data from one subject, while the 'Cross' folder includes multiple subjects.

Each file is a matrix of shape 248 x 35624, where 248 represents the number of sensors, and 35624 represents time steps.

The files have the following format: “taskType subjectIdentifier number.h5”
where taskType can be rest, task motor, task story math, and task working memory.

In practice, these tasks correspond to the activities performed by the subjects:

    • Resting Task
Recording the subjects’ brain while in a relaxed resting
state.

    • Math & Story Task
Subject performs mental calculation and language
processing task.

    • Working Memory task
Subject performs a memorization task.

    • Motor Task
Subject performs a motor task, typically moving fingers
or feets

In [2]:
#pip install h5py

In [20]:
import h5py
import torch
from torch import FloatTensor, LongTensor
from typing import Tuple, List, Callable, Optional

Reading data:

In [None]:
def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

In [None]:
filename_path = "Final Project data/Intra/train/task_motor_105923_1.h5"
with h5py.File(filename_path, 'r') as f:
    dataset_name = get_dataset_name(filename_path)
    matrix = f.get(dataset_name)[()]
    print(type(matrix))
    
    print(matrix.shape)
    print(matrix)

<class 'numpy.ndarray'>
(248, 35624)
[[ 8.42330329e-13  8.52551645e-13  9.59410828e-13 ...  4.99454644e-13
   5.51955020e-13  3.90272504e-13]
 [ 4.21169474e-13  3.46406714e-13  2.44710773e-13 ...  6.50103723e-13
   6.52889906e-13  6.67285237e-13]
 [ 3.71812580e-13  3.14581044e-13  2.56960170e-13 ...  5.14697714e-13
   5.98014368e-13  6.00739673e-13]
 ...
 [ 2.44728771e-13  3.53496583e-13  3.49771671e-13 ... -2.21629925e-13
  -2.18649981e-13 -1.92575502e-13]
 [ 1.59606714e-12  1.63935792e-12  1.61163661e-12 ...  1.73049368e-12
   1.71454441e-12  1.63327934e-12]
 [ 2.71291193e-12  2.74663170e-12  2.71329508e-12 ...  8.00468903e-13
   9.05076957e-13  9.84778340e-13]]


In [6]:
# Data Preprocessing

# min-max scaling
def minmax(trial):
    min = trial.min()
    max = trial.max()
    normalisedTrial = (trial - min)/(max-min)
    return normalisedTrial

#Z-score normalisation OPTIONAL
def zscore(trial):
    return

#downsamples data by totaltimesteps/factor
def downsample(trial, factor):
    ds_trial = trial[:,::factor]
    return ds_trial

#Memory management during training #TODO

In [7]:
#testing the functions

ds = downsample(matrix, 30)
print(ds.shape)

print(matrix)
normalisedMatrix = minmax(matrix)
print(normalisedMatrix.max())
print(normalisedMatrix)


(248, 1188)
[[ 8.42330329e-13  8.52551645e-13  9.59410828e-13 ...  4.99454644e-13
   5.51955020e-13  3.90272504e-13]
 [ 4.21169474e-13  3.46406714e-13  2.44710773e-13 ...  6.50103723e-13
   6.52889906e-13  6.67285237e-13]
 [ 3.71812580e-13  3.14581044e-13  2.56960170e-13 ...  5.14697714e-13
   5.98014368e-13  6.00739673e-13]
 ...
 [ 2.44728771e-13  3.53496583e-13  3.49771671e-13 ... -2.21629925e-13
  -2.18649981e-13 -1.92575502e-13]
 [ 1.59606714e-12  1.63935792e-12  1.61163661e-12 ...  1.73049368e-12
   1.71454441e-12  1.63327934e-12]
 [ 2.71291193e-12  2.74663170e-12  2.71329508e-12 ...  8.00468903e-13
   9.05076957e-13  9.84778340e-13]]
1.0
[[0.95226883 0.9523438  0.95312752 ... 0.94975411 0.95013916 0.94895334]
 [0.94917995 0.94863162 0.94788576 ... 0.950859   0.95087943 0.95098501]
 [0.94881795 0.9483982  0.9479756  ... 0.9498659  0.95047696 0.95049695]
 ...
 [0.94788589 0.94868362 0.9486563  ... 0.94446552 0.94448737 0.94467861]
 [0.9577969  0.9581144  0.95791109 ... 0.95878281 0

In [34]:
# Code for storing data in a folder into an array
import os
import numpy as np

def preprocess_files(files = None, downsampling = 30):
    label_to_int = {'rest': 0, 'task_motor': 1, 'task_story_math': 2, 'task_working_memory': 3}

    cross_data_train = [] # Store data
    cross_data_train_labels = [] # Store labels (based on filename)

    path = 'Final Project data/Cross/train'
    if files == None:
        files = os.listdir(path)

    for file in files:
        file_path = f'{path}/{file}'
        
        with h5py.File(file_path, 'r') as h5_file:
            dataset_name = get_dataset_name(file_path)
            label = dataset_name.split('1')[0].removesuffix('_')
            matrix = h5_file.get(dataset_name)[()]
            normalisedMatrix = downsample(minmax(matrix), downsampling) # apply minmax normalisation and downsampling
            print(type(normalisedMatrix))
            cross_data_train.append(normalisedMatrix.T) # Transpose
            cross_data_train_labels.append(label_to_int[label])
            
            #print(matrix.shape) # should be 248x35624
            #print(label) 
    return cross_data_train, cross_data_train_labels

path = 'Final Project data/Cross/train'
files = os.listdir(path)
current_samples = []
n = 8

# TODO define model

for i, file in enumerate(files):
    current_samples.append(file)
    if len(current_samples) == 8 or i == (len(files)-1):
        print(current_samples)
        X_train, y_train = preprocess_files(current_samples)
        current_samples = []
        # TODO do model fitting
        np_cross_train = np.array(X_train)
        print(np_cross_train.shape)
        print(type(np_cross_train))
        print(y_train)
    
    


['rest_113922_1.h5', 'rest_113922_2.h5', 'rest_113922_3.h5', 'rest_113922_4.h5', 'rest_113922_5.h5', 'rest_113922_6.h5', 'rest_113922_7.h5', 'rest_113922_8.h5']
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(8, 1188, 248)
<class 'numpy.ndarray'>
[0, 0, 0, 0, 0, 0, 0, 0]
['rest_164636_1.h5', 'rest_164636_2.h5', 'rest_164636_3.h5', 'rest_164636_4.h5', 'rest_164636_5.h5', 'rest_164636_6.h5', 'rest_164636_7.h5', 'rest_164636_8.h5']
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(8, 1188, 248)
<class 'numpy.ndarray'>
[0, 0, 0, 0, 0, 0, 0, 0]
['task_motor_113922_1.h5', 'task_motor_113922_2.h5', 'task_motor_113922_3.h5', 'task_motor_113922_4.h5', 'task_motor_113922_5.h5', 'task_motor_113922_6.h5', 'task_mot

In [23]:
# Code for RNN for cross_data here
# https://pytorch.org/docs/stable/generated/torch.nn.RNN.html
#import torch

#torch.Tensor(cross_data_train)

import torch
from torch import nn
from torch import FloatTensor
from typing import Callable, Optional

class RNN(torch.nn.Module):
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        output_dim: int,
        hidden_activation: str,
        output_activation: Callable[[FloatTensor], FloatTensor],
        device: str
    ) -> None:
        super().__init__()
        self.output_activation = output_activation
        self.device = device
        self.rnn = torch.nn.RNN(input_size=input_dim, hidden_size=hidden_dim, nonlinearity=hidden_activation)
        self.h_to_y = torch.nn.Linear(in_features=hidden_dim, out_features=output_dim)

    def forward(self, X:FloatTensor, h_0: Optional[FloatTensor]=None) -> FloatTensor:
        H, _ = self.rnn(X, h_0)
        linear = self.h_to_y(H)
        Y = self.output_activation(linear)
        return Y

# Example usage
input_dim = 248  # Number of features in each time step
hidden_dim = 128  # Number of hidden units in the RNN
output_dim = 4  # Number of classes for multiclass classification

# Create an instance of the RNN model
model = RNN(input_dim, hidden_dim, output_dim, 'tanh', nn.functional.softmax, 'cpu')

# Create a sample input sequence
input_sequence = torch.randn(5, 10, input_dim)  # (batch_size, sequence_length, input_dim)

# Forward pass through the model
output = model(input_sequence)

print("Input Shape:", input_sequence.shape)
print("Output Shape:", output.shape)
                                       

Input Shape: torch.Size([5, 10, 248])
Output Shape: torch.Size([5, 10, 10])


  Y = self.output_activation(linear)


In [11]:
intra_data_train = [] # Store data
intra_data_train_labels = [] # Store labels (based on filename)

path = 'Final Project data/Intra/train'
files = os.listdir(path)

for file in files:
    file_path = f'{path}/{file}'
    
    with h5py.File(file_path, 'r') as h5_file:
        dataset_name = get_dataset_name(file_path)
        label = dataset_name.split('1')[0].removesuffix('_')
        matrix = h5_file.get(dataset_name)[()]
        normalisedMatrix = downsample(minmax(matrix), 30)

        intra_data_train.append(normalisedMatrix)
        intra_data_train_labels.append(label_to_int[label])

        #print(matrix.shape) # should be 248x35624
        #print(label)
        
print(intra_data_train)
print(intra_data_train_labels)
    
    


[array([[0.7259603 , 0.72553097, 0.7265026 , ..., 0.76914094, 0.76781908,
        0.77221401],
       [0.58915389, 0.58851024, 0.58275124, ..., 0.53959231, 0.53608045,
        0.52309441],
       [0.73295568, 0.72959469, 0.72292007, ..., 0.74283908, 0.75200365,
        0.75782618],
       ...,
       [0.74359708, 0.7439141 , 0.74223835, ..., 0.78613385, 0.78117001,
        0.79155966],
       [0.55450591, 0.55091375, 0.53624949, ..., 0.52038457, 0.53350669,
        0.52519571],
       [0.52985012, 0.52697086, 0.52845243, ..., 0.599689  , 0.59940946,
        0.60008036]]), array([[0.8382893 , 0.83633842, 0.83410143, ..., 0.86143941, 0.85747265,
        0.85591197],
       [0.69126016, 0.69519887, 0.70110044, ..., 0.66270103, 0.66650325,
        0.67294429],
       [0.82364095, 0.8136088 , 0.82911204, ..., 0.83531382, 0.83967867,
        0.82950483],
       ...,
       [0.84827566, 0.84600253, 0.84385453, ..., 0.88622574, 0.88113202,
        0.88401513],
       [0.69474368, 0.69079456, 0

In [None]:
# Code for CNN/RNN for intra_data here
