In [15]:
import pandas as pd

In [16]:
N_SUBJECTS = 11             # a folder for each subject
INVALID_SUBJECTS = [6, 7]   # these subjects have missing data and will be ignored
N_TRIALS = 42               # 6 repetitions of 7 gestures, the number of csv files in each subject folder
N_REPETITIONS = 6           # repetitions of gestures in each trial
N_GESTURES = 7              # number of gestures (which we will try to classify) 

In [17]:
import os
import pandas as pd
from glob import glob

# Directory names
directories = [f'subject_{i}' for i in range(1, N_SUBJECTS) if i not in INVALID_SUBJECTS]

# Dictionary to hold dataframes grouped by G
grouped_dfs = {}

for directory in directories:
    # Pattern to match CSV files in each directory
    pattern = os.path.join('original EMG Data', directory, 'S*_R*_G*.csv')

    # Find all CSV files matching the pattern
    for file in glob(pattern):
        # Extract the G value from the filename
        g_value = file.split('_')[-1].split('.')[0]
        # Read and transpose the CSV file
        df = pd.read_csv(file)      #.transpose() # but only for EEG data
        # Add the dataframe to the corresponding group
        if g_value in grouped_dfs:
            grouped_dfs[g_value] = pd.concat([grouped_dfs[g_value], df])
        else:
            grouped_dfs[g_value] = df

# Create "temp" folder if it doesn't exist
os.makedirs("temp", exist_ok=True)
# Save the fused files
for g, df in grouped_dfs.items():
    df.to_csv(f'temp/fused_transposed_G{g}.csv', index=False)


In [None]:
# import data from csv files into a list of dataframes
def import_data(path, transpose=False):

    dataframes = []
    # iterate over all subjects 
    valid_subjects_range = [i for i in range(1, N_SUBJECTS) if i not in INVALID_SUBJECTS]
    for subject in valid_subjects_range:
        folder = os.path.join(path, f'subject_{subject}')
        for file in os.listdir(folder):
            file_path = os.path.join(folder, file)
            assert os.path.isfile(file_path), f"File {file_path} is invalid!"   # ensure that the file exists and are not invalid files/folders here
            # transpose the data if it's EEG data
            dataframe = pd.read_csv(file_path).transpose() if transpose else pd.read_csv(file_path)
            # If column names are missing, rename them to "channel_X" 
            # This happens for EEG data, but we ensure that the column names will be the same for all dataframes
            dataframe.columns = [f"channel_{i+1}" for i in range(len(dataframe.columns))]
            dataframes.append(dataframe)

    return dataframes

eeg_data = import_data('original EEG Data', transpose=True)
emg_data = import_data('original EMG Data', transpose=False)


In [25]:
eeg_data[0].info()
emg_data[0].info()

<class 'pandas.core.frame.DataFrame'>
Index: 1349 entries, 0 to 1348
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       1349 non-null   float64
 1   1       1349 non-null   float64
 2   2       1349 non-null   float64
 3   3       1349 non-null   float64
 4   4       1349 non-null   float64
 5   5       1349 non-null   float64
 6   6       1349 non-null   float64
 7   7       1349 non-null   float64
dtypes: float64(8)
memory usage: 94.9+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1592 entries, 0 to 1591
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Channel_1  1592 non-null   int64
 1   Channel_2  1592 non-null   int64
 2   Channel_3  1592 non-null   int64
 3   Channel_4  1592 non-null   int64
 4   Channel_5  1592 non-null   int64
 5   Channel_6  1592 non-null   int64
 6   Channel_7  1592 non-null   int64
 7   Channel_8  1592 non-null   int64
d