### Installing dependencies

                                                    Project details                                                                
                                 
The data represents various brain activities: resting, math & story tasks, working memory, and motor tasks.

    The 'Intra' folder contains data from one subject, while the 'Cross' folder includes multiple subjects.

Each file is a matrix of shape 248 x 35624, where 248 represents the number of sensors, and 35624 represents time steps.

The files have the following format: “taskType subjectIdentifier number.h5”
where taskType can be rest, task motor, task story math, and task working memory.

In practice, these tasks correspond to the activities performed by the subjects:

    • Resting Task
Recording the subjects’ brain while in a relaxed resting
state.

    • Math & Story Task
Subject performs mental calculation and language
processing task.

    • Working Memory task
Subject performs a memorization task.

    • Motor Task
Subject performs a motor task, typically moving fingers
or feets

In [58]:
#pip install h5py

In [59]:
import h5py
import torch

Reading data:

In [60]:
def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

In [61]:
filename_path = "Final Project data/Intra/train/task_motor_105923_1.h5"
with h5py.File(filename_path, 'r') as f:
    dataset_name = get_dataset_name(filename_path)
    matrix = f.get(dataset_name)[()]
    print(type(matrix))
    
    print(matrix.shape)
    print(matrix)

<class 'numpy.ndarray'>
(248, 35624)
[[ 8.42330329e-13  8.52551645e-13  9.59410828e-13 ...  4.99454644e-13
   5.51955020e-13  3.90272504e-13]
 [ 4.21169474e-13  3.46406714e-13  2.44710773e-13 ...  6.50103723e-13
   6.52889906e-13  6.67285237e-13]
 [ 3.71812580e-13  3.14581044e-13  2.56960170e-13 ...  5.14697714e-13
   5.98014368e-13  6.00739673e-13]
 ...
 [ 2.44728771e-13  3.53496583e-13  3.49771671e-13 ... -2.21629925e-13
  -2.18649981e-13 -1.92575502e-13]
 [ 1.59606714e-12  1.63935792e-12  1.61163661e-12 ...  1.73049368e-12
   1.71454441e-12  1.63327934e-12]
 [ 2.71291193e-12  2.74663170e-12  2.71329508e-12 ...  8.00468903e-13
   9.05076957e-13  9.84778340e-13]]


In [62]:
# Data Preprocessing

# min-max scaling
def minmax(trial):
    min = trial.min()
    max = trial.max()
    normalisedTrial = (trial - min)/(max-min)
    return normalisedTrial

#Z-score normalisation OPTIONAL
def zscore(trial):
    return

#downsamples data by totaltimesteps/factor
def downsample(trial, factor):
    ds_trial = trial[:,::factor]
    return ds_trial

#Memory management during training #TODO

In [71]:
#testing the functions

ds = downsample(matrix, 30)
print(ds.shape)

print(matrix)
normalisedMatrix = minmax(matrix)
print(normalisedMatrix.max())
print(normalisedMatrix)


(248, 1188)
[[ 1.50533313e-13  4.02349188e-13  5.51022769e-13 ...  3.80976527e-13
   4.92481706e-13  4.49273458e-13]
 [-5.69284528e-13 -7.67568355e-13 -9.13843328e-13 ... -1.95356231e-12
  -2.24889833e-12 -2.33434149e-12]
 [ 2.40841418e-12  2.45279773e-12  2.50613592e-12 ...  2.18648125e-12
   2.21568098e-12  2.27213387e-12]
 ...
 [ 2.27254392e-12  2.32059554e-12  2.19059558e-12 ...  2.67223375e-12
   2.62604478e-12  2.61077249e-12]
 [-3.08351055e-13 -1.77339561e-13 -6.60747227e-14 ... -5.12656378e-13
  -5.44175111e-13 -6.66832258e-13]
 [ 4.55521862e-12  4.64909751e-12  4.74642504e-12 ...  5.11770574e-12
   5.17173413e-12  5.17326676e-12]]
1.0
[[0.95200887 0.95280086 0.95326846 ... 0.95273364 0.95308434 0.95294844]
 [0.94974494 0.94912131 0.94866126 ... 0.9453912  0.94446233 0.9441936 ]
 [0.95911021 0.9592498  0.95941756 ... 0.9584122  0.95850404 0.95868159]
 ...
 [0.95868288 0.95883401 0.95842514 ... 0.95993996 0.95979469 0.95974666]
 [0.95056561 0.95097766 0.9513276  ... 0.94992304 0

In [69]:
# Code for storing data in a folder into an array
import os
cross_data_train = [] # Store data
cross_data_train_labels = [] # Store labels (based on filename)

path = 'Final Project data/Cross/train'
files = os.listdir(path)

for file in files:
    file_path = f'{path}/{file}'
    
    with h5py.File(file_path, 'r') as h5_file:
        dataset_name = get_dataset_name(file_path)
        label = dataset_name.split('1')[0].removesuffix('_')
        matrix = h5_file.get(dataset_name)[()]
        normalisedMatrix = downsample(minmax(matrix), 30) # apply minmax normalisation and downsampling

        cross_data_train.append(normalisedMatrix)
        cross_data_train_labels.append(label)
        
        #print(matrix.shape) # should be 248x35624
        #print(label) 
        
print(cross_data_train)
print(cross_data_train_labels)
        
    
    


[array([[0.76688285, 0.7696216 , 0.77612789, ..., 0.78325926, 0.76991305,
        0.78553926],
       [0.57299467, 0.58447203, 0.56528285, ..., 0.56397655, 0.57809401,
        0.55630643],
       [0.71585343, 0.71634273, 0.73371268, ..., 0.72750332, 0.72029202,
        0.7254296 ],
       ...,
       [0.74821799, 0.75641031, 0.77623912, ..., 0.75772562, 0.7538468 ,
        0.75785937],
       [0.53221695, 0.55552249, 0.570908  , ..., 0.53101288, 0.51617279,
        0.52455866],
       [0.55612879, 0.56854628, 0.56171264, ..., 0.492424  , 0.48295324,
        0.48274687]]), array([[0.77069627, 0.75956546, 0.76242005, ..., 0.77826939, 0.78446858,
        0.78541656],
       [0.59003964, 0.60038755, 0.61076739, ..., 0.56689611, 0.56923822,
        0.5706648 ],
       [0.72393966, 0.72108341, 0.71526382, ..., 0.73754202, 0.73141002,
        0.73369502],
       ...,
       [0.75955245, 0.75205455, 0.7465635 , ..., 0.78728144, 0.7903216 ,
        0.787401  ],
       [0.56878557, 0.57137124, 0

In [65]:
# Code for CNN/RNN for cross_data here
import torch

                                       

In [66]:
intra_data_train = [] # Store data
intra_data_train_labels = [] # Store labels (based on filename)

path = 'Final Project data/Intra/train'
files = os.listdir(path)

for file in files:
    file_path = f'{path}/{file}'
    
    with h5py.File(file_path, 'r') as h5_file:
        dataset_name = get_dataset_name(file_path)
        label = dataset_name.split('1')[0].removesuffix('_')
        matrix = h5_file.get(dataset_name)[()]
        normalisedMatrix = downsample(minmax(matrix), 30)

        intra_data_train.append(normalisedMatrix)
        intra_data_train_labels.append(label)

        #print(matrix.shape) # should be 248x35624
        #print(label)
        
print(intra_data_train)
print(intra_data_train_labels)
    
    


[array([[0.7259603 , 0.72553097, 0.7265026 , ..., 0.76914094, 0.76781908,
        0.77221401],
       [0.58915389, 0.58851024, 0.58275124, ..., 0.53959231, 0.53608045,
        0.52309441],
       [0.73295568, 0.72959469, 0.72292007, ..., 0.74283908, 0.75200365,
        0.75782618],
       ...,
       [0.74359708, 0.7439141 , 0.74223835, ..., 0.78613385, 0.78117001,
        0.79155966],
       [0.55450591, 0.55091375, 0.53624949, ..., 0.52038457, 0.53350669,
        0.52519571],
       [0.52985012, 0.52697086, 0.52845243, ..., 0.599689  , 0.59940946,
        0.60008036]]), array([[0.8382893 , 0.83633842, 0.83410143, ..., 0.86143941, 0.85747265,
        0.85591197],
       [0.69126016, 0.69519887, 0.70110044, ..., 0.66270103, 0.66650325,
        0.67294429],
       [0.82364095, 0.8136088 , 0.82911204, ..., 0.83531382, 0.83967867,
        0.82950483],
       ...,
       [0.84827566, 0.84600253, 0.84385453, ..., 0.88622574, 0.88113202,
        0.88401513],
       [0.69474368, 0.69079456, 0

In [67]:
# Code for CNN/RNN for intra_data here
