In [1]:
import pandas as pd
import os


In [2]:
N_SUBJECTS = 11             # a folder for each subject
INVALID_SUBJECTS = [6, 7]   # these subjects have missing data and will be ignored
N_TRIALS = 42               # 6 repetitions of 7 gestures, the number of csv files in each subject folder
N_REPETITIONS = 6           # repetitions of gestures in each trial
N_GESTURES = 7              # number of gestures (which we will try to classify) 

In [3]:
# import data from csv files into a list of dataframes
def import_data(path, transpose=False):

    dataframes = []
    # iterate over all subjects 
    valid_subjects_range = [i for i in range(1, N_SUBJECTS) if i not in INVALID_SUBJECTS]
    for subject in valid_subjects_range:
        folder = os.path.join(path, f'subject_{subject}')
        for file in os.listdir(folder):
            file_path = os.path.join(folder, file)
            assert os.path.isfile(file_path), f"File {file_path} is invalid!"   # ensure that the file exists and are not invalid files/folders here
            # transpose the data if it's EEG data
            dataframe = pd.read_csv(file_path).transpose() if transpose else pd.read_csv(file_path)
            # If column names are missing, rename them to "channel_X" 
            # This happens for EEG data, but we ensure that the column names will be the same for all dataframes
            dataframe.columns = [f"channel_{i+1}" for i in range(len(dataframe.columns))]
            dataframes.append(dataframe)

    return dataframes

eeg_data = import_data('original EEG Data', transpose=True)
emg_data = import_data('original EMG Data', transpose=False)


In [6]:
# inspect the raw data
print(f"Number of EEG dataframes: {len(eeg_data)}")
eeg_data[0].info()
print("\n-----------------------------------\n")
print(f"Number of EMG dataframes: {len(emg_data)}")
emg_data[0].info()

Number of EEG dataframes: 336
<class 'pandas.core.frame.DataFrame'>
Index: 1349 entries, 0 to 1348
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   channel_1  1349 non-null   float64
 1   channel_2  1349 non-null   float64
 2   channel_3  1349 non-null   float64
 3   channel_4  1349 non-null   float64
 4   channel_5  1349 non-null   float64
 5   channel_6  1349 non-null   float64
 6   channel_7  1349 non-null   float64
 7   channel_8  1349 non-null   float64
dtypes: float64(8)
memory usage: 94.9+ KB

-----------------------------------

Number of EMG dataframes: 336
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1592 entries, 0 to 1591
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   channel_1  1592 non-null   int64
 1   channel_2  1592 non-null   int64
 2   channel_3  1592 non-null   int64
 3   channel_4  1592 non-null   int64
 4   channel_5  1592 non-